In [1]:
import numpy as np
import pandas as pd
from preprocessing.wrangling import get_indi_df, get_labels, slide_and_flatten
from preprocessing.extract_features import get_all_ta_features, get_wavelet_coeffs
from evaluation.eval import sliding_window_cv_regression, batch_test_swcv_regression
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error, r2_score
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.feature_selection import SelectFromModel
from sklearn.linear_model import Lasso
from sklearn.ensemble import RandomForestRegressor
from numpy.lib.stride_tricks import sliding_window_view
from xgboost import XGBRegressor
from sklearn.pipeline import make_pipeline

In [2]:
df = get_indi_df("ASHOKLEY.NS", ohlcvfile="data_collection/ohlcv_data/ohlcv_auto.csv", start_date="2017-01-01")
df = get_all_ta_features(df)
drop_columns = ['Date']
df.drop(drop_columns, axis=1, inplace=True)
move_dir_target, cls_target = get_labels(df['Close'])
df = df.iloc[:-1]
cls_target = cls_target.iloc[:-1]

  dip[idx] = 100 * (self._dip[idx] / value)
  din[idx] = 100 * (self._din[idx] / value)


In [3]:
df10 = slide_and_flatten(df, window_len=10)
df10 = pd.DataFrame(df10, index=df.index[9:])
df30 = slide_and_flatten(df, window_len=30)
df30 = pd.DataFrame(df30, index=df.index[29:])
df60 = slide_and_flatten(df, window_len=60)
df60 = pd.DataFrame(df60, index=df.index[59:])

df10_wavelet = get_wavelet_coeffs(df['Close'], len_window=10, decomp_level=2)
df10_wavelet = pd.DataFrame.from_records(df10_wavelet, index=df10.index)
df30_wavelet = get_wavelet_coeffs(df['Close'], len_window=30, decomp_level=2)
df30_wavelet = pd.DataFrame.from_records(df30_wavelet, index=df30.index)
df60_wavelet = get_wavelet_coeffs(df['Close'], len_window=60, decomp_level=2)
df60_wavelet = pd.DataFrame.from_records(df60_wavelet, index=df60.index)

In [30]:
df10 = df10.merge(df10_wavelet, left_index=True, right_index=True)
df30 = df30.merge(df30_wavelet, left_index=True, right_index=True)
df60 = df60.merge(df60_wavelet, left_index=True, right_index=True)

In [32]:
pipe1 = make_pipeline(
    (RobustScaler()),
    (RandomForestRegressor(n_estimators=100))
)

pipe2 = make_pipeline(
    (RobustScaler()),
    (RandomForestRegressor(n_estimators=200))
)

In [33]:
y = cls_target - df['Close']
y10 = cls_target[9:] - df['Close'].iloc[9:]
y30 = cls_target[29:] - df['Close'].iloc[29:]
y60 = cls_target[59:] - df['Close'].iloc[59:]

def add_closing_price(y, cls_price):
    return y + cls_price

In [34]:
batch_test_swcv_regression(
    list_X = [df30, df60],
    list_y = [y30, y60],
    list_pipe = [pipe1, pipe2],
    list_n_tr = [120],
    list_n_ts = [1],
    scorers = [mean_squared_error,mean_absolute_percentage_error, r2_score],
    savefile='rfg_tests.csv',
    comment_X=["df10_wavelet", "df60_wavelet"],
    list_post_processors=[(add_closing_price, {'cls_price':df['Close'].iloc[29:len(df)-(120+1)]}),
    (add_closing_price, {'cls_price':df['Close'].iloc[59:len(df)-(120+1)]})]
)

# cls_price should have length len(X)-(n_tr+n_ts)