In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.model_selection import train_test_split, cross_val_score
from xgboost import XGBRegressor
import statsmodels.formula.api as smf

In [None]:
btc = pd.read_csv('btc-usdt.csv')
valid = pd.DataFrame(btc.loc[40000:])
btc = pd.DataFrame(btc.loc[:39999])

In [None]:
btc.rename(columns = {'Open Time':'Opentime','Close Time':'Closetime'},inplace = True)
valid.rename(columns = {'Open Time':'Opentime','Close Time':'Closetime'},inplace = True)

In [None]:
btc['Opentime'] = pd.to_datetime(btc.Opentime)
valid['Opentime'] = pd.to_datetime(valid.Opentime)
btc['Closetime'] = pd.to_datetime(btc.Closetime)
valid['Closetime'] = pd.to_datetime(valid.Closetime)
btc

In [None]:
plt.figure(figsize=(10, 5))
plt.plot(btc.index, btc.Close, label = 'close price', linewidth = '1')
plt.plot(valid.index, valid.Close, label = 'valid')
plt.legend()
plt.show()


In [None]:
btc['lag_1'] = btc['Close'].shift(1)
btc['lag_2'] = btc['Close'].shift(2)
btc['lag_3'] = btc['Close'].shift(3)
btc['lag_4'] = btc['Close'].shift(4)
btc['lag_5'] = btc['Close'].shift(5)
btc['trade_lag'] = btc['Number of Trades'].shift(1)
btc.dropna(axis = 0, inplace = True)

model = LinearRegression()
model.fit(btc[['trade_lag']], btc[['Close']])
print(model.score(btc[['trade_lag']],btc[['Close']]))

plt.subplot(1,2,1)
plt.xlabel("lag_1", fontsize = 20)
plt.ylabel("close", fontsize = 20)
plt.scatter(btc.lag_1, btc.Close, s = 0.5)

plt.subplot(1,2,2)
plt.xlabel("trade_lag", fontsize = 20)
#plt.ylabel("close", fontsize = 5)
plt.scatter(btc.trade_lag, btc.Close, s = 0.5)

plt.show()

In [None]:
data = pd.concat([btc.trade_lag, btc.Close], axis=1)
mod = smf.ols("tra ~ cls", data).fit()
print(mod.summary())

In [None]:
X = pd.DataFrame(btc[['lag_1', 'lag_2', 'lag_3', 'lag_4', 'lag_5']])
Y = pd.DataFrame(btc['Close'])
x_train , x_test,y_train, y_test = train_test_split(X,Y)
#print(x_train.info(),y_train.info())
#print(X,Y)
model = LinearRegression()
model_1 = XGBRegressor()
model_2 = Ridge()

cross_val_score(model_2,x_train,y_train)

In [None]:
model.fit(x_train,y_train )
x_test = pd.DataFrame(x_test)
pred = pd.DataFrame(model.predict(x_test),columns = ['value'])
new_pred = pred.reindex(x_test.index)
new_pred['value'] = [i for i in pred['value']]

df = pd.concat([x_test, new_pred], axis =1)
df.sort_index(inplace = True)
#print(df)
x = df.loc[:].index
y = df.loc[:]['value']
y_test.sort_index(inplace = True)

y_real = y_test.loc[:]
#trend = np.array(y_real.rolling(window=1000, center =True,min_periods=1).mean())
plt.figure(figsize= (10, 5))
plt.plot(x, y, label = 'pred')
plt.plot(x, y_real['Close'], label = 'real')
#plt.plot(x,trend)
plt.legend()
plt.show()

print(mean_squared_error(y,y_real)**0.5)
print(model.score(x_train,y_train))

In [None]:
valid['lag_1'] = valid['Close'].shift(1)
valid['lag_2'] = valid['Close'].shift(2)
valid['lag_3'] = valid['Close'].shift(3)
valid['lag_4'] = valid['Close'].shift(4)
valid['lag_5'] = valid['Close'].shift(5)
valid['trade_lag'] = valid['Number of Trades'].shift(1)
valid.dropna(inplace = True)
#print(valid)

valid = valid.loc[40000:40050]
pre = model.predict(valid[['lag_1','lag_2','lag_3','lag_4','lag_5']])
plt.figure(figsize =(10,5))
plt.plot(valid.index, pre, label = 'pred')
plt.plot(valid.index, valid['Close'], label = 'real')
plt.legend()
plt.show()
print(mean_squared_error(pre,valid['Close'])**0.5)

In [None]:
eth = pd.read_csv('eth-usdt.csv')

eth['lag_1'] = eth['Close'].shift(1)
eth['lag_2'] = eth['Close'].shift(2)
eth['lag_3'] = eth['Close'].shift(3)
eth['lag_4'] = eth['Close'].shift(4)
eth['lag_5'] = eth['Close'].shift(5)
eth['trade_lag'] = eth['Number of Trades'].shift(5)
eth.dropna(inplace = True)

eth_x = eth[['lag_1','lag_2','lag_3','lag_4','lag_5']]
eth_y = eth['Close']
p = np.array(model_2.predict(eth_x))
print(mean_squared_error(p,eth_y)**0.5)
print(model.score(eth_x,eth_y))

In [None]:
plt.figure(figsize=(10,5))
plt.plot(eth_y.loc[6000:6050].index, eth_y.loc[6000:6050], label = 'pred')
plt.plot(eth_y.loc[6000:6050].index, p[6000:6051])
plt.legend()
plt.show()