In [None]:
import pandas as pd
import numpy as np
from scipy.stats import zscore, spearmanr
from tqdm import tqdm
from statsmodels.formula.api import ols

from matplotlib import pyplot as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = [8, 4]

import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv('raw_train.csv').iloc[:,1:]

In [None]:
df['vwap'] = df['amount']*10 / df['volume']
features = ['ts_code', 'trade_date', 'log-ret', 'open', 'close', 'high', 'low', 'vwap',
       'turnover_rate', 'amount', 'pe', 'pb', 'total_share', 'total_mv', 'volume',
       'name', 'industry', 'list_date', 'issue_price', 'issue_amount','revenue_ps',
       'dt_eps', 'dt_eps_yoy', 'bps', 'bps_yoy', 'roe_dt','roe_yoy'
            ]
df = df[features]
df.columns

## Get Alpha Object

In [None]:
from alpha_191 import Alpha191

alpha = Alpha191(df)

In [None]:
universe = alpha.alpha030(alpha.df)

In [None]:
universe = alpha.alpha131(alpha.df)

In [None]:
universe = alpha.alpha_cus002(alpha.df)

In [None]:
universe = alpha.alpha_cus001(alpha.df)

In [None]:
universe = alpha.alpha040(alpha.df)

In [None]:
universe = alpha.alpha075(alpha.df)

In [None]:
universe = alpha.alpha190(alpha.df)

In [None]:
universe = alpha.alpha176(alpha.df)

In [None]:
universe = alpha.alpha119(alpha.df)

In [None]:
universe = alpha.alpha149(alpha.df)

## Factor Evaluate

In [None]:
tmp = universe.copy(deep=True)
tmp['close'] = tmp.groupby('ts_code')['close'].shift(-5)
all_pricing = tmp.reset_index().pivot(index='date', columns='ts_code', values='close')
all_pricing.head()

In [None]:
import alphalens as al
from scipy.stats import zscore
import project_helper as ph

def show_sample_results(factors_with_alpha, features, pricing=all_pricing, holding_time=5):
    factors_with_alpha = factors_with_alpha[features]
    
    # Setup data for AlphaLens
    print('Cleaning Data...\n')
    factor_data = ph.build_factor_data(factors_with_alpha, pricing, holding_time=holding_time)
    print('\n-----------------------\n')
    
    # Calculate Factor Returns and Sharpe Ratio
    factor_returns = ph.get_factor_returns(factor_data)
    sharpe_ratio = ph.sharpe_ratio(factor_returns)
    
    # Show Results
    print('             Sharpe Ratios')
    print(sharpe_ratio.round(2))
    ph.plot_factor_returns(factor_returns)
    ph.plot_factor_rank_autocorrelation(factor_data)

In [None]:
factor_names = [
            'volume','alpha_030', 'alpha_075','alpha_190', 'alpha_176', 'alpha_149', 'alpha_119'
            ]

all_factors = universe.copy(deep=True)[factor_names + ['trade_date','ts_code']]
all_factors = all_factors.fillna(0.)
all_factors = all_factors[~all_factors.isin([np.inf, -np.inf]).any(1)]
all_factors.reset_index(drop=True, inplace=True)
all_factors['date'] = pd.to_datetime(all_factors['trade_date'], format='%Y%m%d')
all_factors = all_factors.set_index(['date','ts_code']).sort_values(by=['date'])
print(all_factors.shape)

In [None]:
show_sample_results(all_factors, factor_names, pricing=all_pricing, holding_time=5)

In [None]:
tmp = universe.copy(deep=True)
tmp['close'] = tmp.groupby('ts_code')['close'].shift(-2)
all_pricing = tmp.reset_index().pivot(index='date', columns='ts_code', values='close')
all_pricing.head()

In [None]:
factor_names = [
            'volume','alpha_cus001', 'alpha_cus002', 'alpha_040',  'alpha_131',
            ]

all_factors = universe.copy(deep=True)[factor_names + ['trade_date','ts_code']]
all_factors = all_factors.fillna(0.)
all_factors = all_factors[~all_factors.isin([np.inf, -np.inf]).any(1)]
all_factors.reset_index(drop=True, inplace=True)
all_factors['date'] = pd.to_datetime(all_factors['trade_date'], format='%Y%m%d')
all_factors = all_factors.set_index(['date','ts_code']).sort_values(by=['date'])
print(all_factors.shape)

In [None]:
show_sample_results(all_factors, factor_names, pricing=all_pricing, holding_time=5)

## Check Sigle Factor

In [None]:
universe.loc[universe['alpha_cus002'].isnull()==False].sort_values(by=['alpha_cus002']).head(10)

In [None]:
universe.ts_code.unique()

In [None]:
### benchmark_close.plot()
#benchmark_open.plot()
name = 'alpha_040'
# def percent_(df, name):
#     #df[name] = np.where(df[name]>0, df[name]/df[name].max(), df[name]/df[name].min())
#     return df
universe_plot = universe.copy()
#universe_plot[[name]] = universe_plot.groupby('trade_date')[[name]].rank(pct=True)
universe_plot[[name]] = universe_plot.groupby('trade_date')[[name]].fillna(0.).apply(zscore)
#universe_plot[[name]] = (universe_plot[[name]] - 0.5) * 2
#universe_plot[name] = universe_plot.groupby('ts_code')[name].apply(lambda x: x/x.rolling(12).max())
#300533.SZ
#'002826.SZ'
tmp = universe_plot.loc[(universe_plot.ts_code=='603538.SH') & (universe_plot.trade_date>20180301)][[name,'close']]
#tmp[['close',name]] = tmp[['close', name]].apply(zscore)
tmp[['close']] = tmp[['close']].apply(zscore)
tmp['close'] = tmp['close'].shift(-5)
#tmp['log-ret'] = tmp['log-ret'].shift(-5)

tmp.plot(title=name, grid=True)
#tmp['close'].rank(method='min', pct=True).plot()

In [None]:
universe.head()

In [None]:

universe['alpha_119'].hist()

In [None]:
universe.alpha_075.mean()