In [27]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

def read_file(filename):
    return pd.read_csv(filename, index_col=0, parse_dates=True, infer_datetime_format=True)

def get_data(timeframe):
    return read_file("../../data/btcusdt_" + timeframe + "_total.csv")

time_frame = "15m"
# Load data
df = get_data(time_frame)
df.head()

df['Daily_Return'] = df['close'].pct_change()*1000
df['Next_Day_Return'] = df['close'].pct_change(periods=2).shift(-2) * 1000
window = 14
delta = df['close'].diff(1)
gain = delta.where(delta > 0, 0)
loss = -delta.where(delta < 0, 0)
avg_gain = gain.rolling(window=window).mean()
avg_loss = loss.rolling(window=window).mean()
rs = avg_gain / avg_loss
df['RSI'] = 100 - (100 / (1 + rs))
short_window = 12
long_window = 26
signal_window = 9
df['Short_MA'] = df['close'].rolling(window=short_window).mean()
df['Long_MA'] = df['close'].rolling(window=long_window).mean()
df['MACD'] = df['Short_MA'] - df['Long_MA']
df['Signal_Line'] = df['MACD'].rolling(window=signal_window).mean()
n = 5
df['ROC'] = df['close'].pct_change(n) * 100
df['OBV'] = np.where(df['close'] > df['close'].shift(1), df['volume'], -df['volume'])
df['OBV'] = df['OBV'].cumsum()
# df['o_c'] = df['open'] - df['close']
# df['o_c_v'] = df['o_c']*df['volume']
df_ = df.drop(['high', 'low', 'MACD', 'Short_MA', 'Long_MA'], axis=1)
df_ = df_.drop(df_.index[:50])
df_ = df_.dropna()
df_.to_csv("../logs/junk" + time_frame + ".csv")

In [28]:
from sklearn.linear_model import LinearRegression, ElasticNet, Ridge
from sklearn.kernel_ridge import KernelRidge
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
X = df_.drop(['Next_Day_Return'], axis=1)
y = df_['Next_Day_Return']
df_['Predicted_Return'] = 0

# poly = PolynomialFeatures()
# X_a = poly.fit_transform(X)

model = LinearRegression()
lookback = 50000
pred_window = 3000
for i in range(lookback, len(X) - pred_window, pred_window):
    model.fit(X[i-lookback:i-3], y[i-lookback:i-3])
    y_pred = model.predict(X[i:i+pred_window])
    df_.loc[X[i:i+pred_window].index, 'Predicted_Return'] = y_pred
df_['indicator'] = np.where(df_['Predicted_Return'] > 0, 1, np.where(df_['Predicted_Return'] < 0, -1, 0))
data = df_
tot = 0
for index, _ in data.iterrows():
    data.at[index, 'signal'] = 0
    if data.at[index, 'indicator'] == 1:
        if tot == 0 or tot == -1:
            # enter long position/exit short position
            tot += 1
            data.at[index, 'signal'] = 1
    elif data.at[index, 'indicator'] == -1:
        if tot == 0 or tot == 1:
            # enter short position/exit long position
            tot -= 1
            data.at[index, 'signal'] = -1
    else:
        if tot == 1:
            # exit long position
            data.at[index, 'signal'] = -1
        elif tot == -1:
            # exit short position
            data.at[index, 'signal'] = 1
        tot = 0
data.drop(data.index[:lookback], inplace=True)
# data.drop(['RSI', 'Signal_Line', 'ROC', 'OBV'], axis=1, inplace=True)
data.to_csv("../logs/lin_reg_window_train_" + time_frame + ".csv")