# Regression

In [1]:
import datetime
import time
import pandas_datareader.data as web
import pandas as pd


def time_strf(a):
    b = time.localtime(a)
    c = time.strftime("%Y-%m-%d %H:%M:%S", b)
    return c

def process_time(BCH, agg='d'):
    BCH.index = pd.to_datetime(BCH.TimeStamp.apply(time_strf))
    BCH = BCH.drop(columns=['TimeStamp'])
    res = BCH.resample(agg).first()
    return BCH, res




def join_corr_data(dt, start, end, dt_symbol, rsuffix_name ):
    dtt = dt.copy()
    ## read & join
    for i in range(len(dt_symbol)):
        a = web.DataReader(dt_symbol[i], "yahoo", start, end)
        dtt = dtt.join(a[["Close"]], how='left',rsuffix='_'+rsuffix_name[i])
    return dtt

def get_btc(start, end): # 小写
    BTC = web.DataReader("BTC-USD", "yahoo", start, end)
    btc = BTC.copy()
    columns_lower = [i.lower() for i in list(BTC.columns)]
    btc.columns = columns_lower
    return btc


start = datetime.datetime(2010,1,1)
end = datetime.date.today()
btc = get_btc(start, end)
#################################### 


def get_ta_indicates(btc):
    #################################### 
    # 中间添加技术指标. 输入 btc table. 输出 btc_techniques table

    #a = ATR(btc, 14)
    from tqsdk.ta import ATR, CCI, PVT, EMA, ARBR, QHLSR, ROC, VRSI, OBV, PRICEOSC, ARBR

    a = ATR(btc, 14).set_index(btc.index) # 平均真实波幅
    b = CCI(btc,14).set_index(btc.index) # 顺势指标
    c = PVT(btc).set_index(btc.index)
    d = EMA(btc,7).set_index(btc.index) # 指数加权移动平均线
    # e = ARBR(btc,7).set_index(btc.index) #人气意愿指标
    f = QHLSR(btc).set_index(btc.index) # 阻力指标
    g = ROC(btc,7,7).set_index(btc.index)# 变动速率
    # h = VRSI(btc, 7)# 量相对强弱
    i = OBV(btc).set_index(btc.index) # 能量潮
    j = PRICEOSC(btc, 30,7).set_index(btc.index) # 价格震荡指数
    # k = ARBR(btc, 7).set_index(btc.index) # 人气意愿指标
    btc_techniques = pd.concat([btc,a,b,c,d,f,g,i,j], axis=1)
    return btc_techniques








btc_techniques = get_ta_indicates(btc)
######################################



dt_symbol=["GC=F","CL=F","EURUSD=X","ETHUSD=X", "LTCUSD=X","BCH-USD"]
rsuffix_name= ['gold', 'oil', 'fx', 'eth','ltc','bch']
dtt = join_corr_data(btc_techniques, start, end, dt_symbol, rsuffix_name )


dtt.drop(columns = dtt.columns[0:5],inplace = True)
dtt.dropna(inplace = True)
dtt.count()

  from pandas.util.testing import assert_frame_equal
在使用天勤量化之前，默认您已经知晓并同意以下免责条款，如果不同意请立即停止使用：https://www.shinnytech.com/blog/disclaimer/


adj close    628
tr           628
atr          628
cci          628
pvt          628
ema          628
qhl5         628
qhl10        628
roc          628
rocma        628
obv          628
priceosc     628
Close        628
Close_oil    628
Close_fx     628
Close_eth    628
Close_ltc    628
Close_bch    628
dtype: int64

## Correlation

In [2]:
dtt.head()
name = ['BTC']+ list(dtt.columns[1:-len(rsuffix_name)]) + rsuffix_name
dtt.columns = name

for column in dtt.columns:
    corr = dtt['BTC'].corr(dtt[column])
    print('The correlation between BTC and ' + column + ' is ' + str(corr)) 

The correlation between BTC and BTC is 1.0
The correlation between BTC and tr is 0.6120548759142425
The correlation between BTC and atr is 0.7646336885655147
The correlation between BTC and cci is 0.05857018223482467
The correlation between BTC and pvt is 0.44625847276802444
The correlation between BTC and ema is 0.9844691418412306
The correlation between BTC and qhl5 is 0.06780160700142357
The correlation between BTC and qhl10 is 0.07344415063919969
The correlation between BTC and roc is 0.15887887632112635
The correlation between BTC and rocma is 0.19754728631770638
The correlation between BTC and obv is 0.3521329377829034
The correlation between BTC and priceosc is 0.22665049455268427
The correlation between BTC and gold is 0.2503424080812395
The correlation between BTC and oil is 0.12194810993372872
The correlation between BTC and fx is 0.10980193036819123
The correlation between BTC and eth is 0.562355682969872
The correlation between BTC and ltc is 0.7360565669858773
The correlat

## OLS Model

In [3]:
import statsmodels.formula.api as smf
model_2 = smf.ols(formula='BTC ~' + " + ".join(name[-len(rsuffix_name)::]), data=dtt).fit()
model_2.summary()

0,1,2,3
Dep. Variable:,BTC,R-squared:,0.808
Model:,OLS,Adj. R-squared:,0.806
Method:,Least Squares,F-statistic:,436.2
Date:,"Thu, 14 May 2020",Prob (F-statistic):,6.32e-219
Time:,13:19:08,Log-Likelihood:,-5374.0
No. Observations:,628,AIC:,10760.0
Df Residuals:,621,BIC:,10790.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,1.313e+04,3381.001,3.882,0.000,6486.540,1.98e+04
gold,9.2562,0.617,15.005,0.000,8.045,10.468
oil,61.3106,5.975,10.262,0.000,49.578,73.044
fx,-2.207e+04,2444.368,-9.030,0.000,-2.69e+04,-1.73e+04
eth,-1.4368,0.643,-2.235,0.026,-2.699,-0.174
ltc,32.5175,2.040,15.939,0.000,28.511,36.524
bch,2.3988,0.231,10.376,0.000,1.945,2.853

0,1,2,3
Omnibus:,243.979,Durbin-Watson:,0.151
Prob(Omnibus):,0.0,Jarque-Bera (JB):,2200.239
Skew:,1.47,Prob(JB):,0.0
Kurtosis:,11.686,Cond. No.,126000.0


## Linear Regression

In [4]:
from sklearn.linear_model import LinearRegression
model_3 = LinearRegression()
y = dtt["BTC"]
X = dtt.drop(["BTC"], axis=1)
model_3.fit(X,y)
model_3.coef_

array([-2.36788542e-02, -1.15130236e-01,  5.37076378e-01,  1.91467600e-08,
        1.02999295e+00,  1.22703225e+02,  1.41264067e+02,  2.88496598e+01,
       -7.81020989e+00, -5.74199813e-10, -1.45566508e+00, -1.10655558e+00,
       -2.18185726e+00,  1.52567981e+03, -7.03497343e-01,  1.22657134e+00,
       -1.63268631e-03])

## Varible Selection

In [5]:
from sklearn.linear_model import LassoLarsIC

model_bic = LassoLarsIC(criterion='bic')
model_bic.fit(X, y)   
alpha_bic_ = model_bic.alpha_

list(X.columns[model_bic.coef_>0])

['ema', 'roc']

## Linear Regression After Selection

In [6]:
model_4 = LinearRegression()
y = dtt["BTC"]
X = dtt.drop(["BTC","fx","eth"], axis=1)
model_4.fit(X,y)
model_4.coef_

array([-1.69026966e-02, -8.65172318e-02,  5.60707095e-01,  1.84019968e-08,
        1.02743759e+00,  1.32023803e+02,  1.07390356e+02,  2.88300898e+01,
       -8.28559912e+00, -5.42680803e-10,  3.40234933e-01, -1.29161582e+00,
       -2.85377190e+00,  2.98886783e-01, -1.35079489e-01])