In [13]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from catboost import CatBoostRegressor

In [3]:
df = pd.read_csv("price_prediction_data.csv")
df["Date"] = pd.to_datetime(df["Date"])

In [4]:
df.head()

Unnamed: 0,Date,A,B,C
0,2013-05-01,248.229996,106.25,904.093817
1,2013-05-02,252.550003,98.099998,901.763325
2,2013-05-03,258.049988,112.900002,996.998207
3,2013-05-06,255.720001,109.599998,894.422647
4,2013-05-07,257.730011,113.199997,882.606339


In [5]:
df_a = df[["Date", "A"]].rename(columns={"Date": "date", "A": "close"})
df_b = df[["Date", "B"]].rename(columns={"Date": "date", "B": "close"})
df_c = df[["Date", "C"]].rename(columns={"Date": "date", "C": "close"})

In [7]:
from copy import deepcopy as dc

def prepare_df_for_lstm(df, n_steps):
  cdf = dc(df)

  for i in range(n_steps - 1, 0, -1):
    cdf[f'close(t-{i})'] = cdf['close'].shift(i)
  
  cdf['moving_target'] = cdf['close'] - cdf['close(t-1)']
  cdf['target'] = cdf['close']

  cdf.dropna(inplace=True)
  
  cdf = cdf.drop(columns=["close"])
  
  return cdf

lookback = 7
shifted_df_a = prepare_df_for_lstm(df_a, lookback)
shifted_df_b = prepare_df_for_lstm(df_b, lookback)
shifted_df_c = prepare_df_for_lstm(df_c, lookback)
  

In [12]:
stacked_df = pd.concat([shifted_df_a, shifted_df_b, shifted_df_c], axis=0)
stacked_df.head()

Unnamed: 0,date,close(t-6),close(t-5),close(t-4),close(t-3),close(t-2),close(t-1),moving_target,target
6,2013-05-09,248.229996,252.550003,258.049988,255.720001,257.730011,258.679993,1.480011,260.160004
7,2013-05-10,252.550003,258.049988,255.720001,257.730011,258.679993,260.160004,3.470001,263.630005
8,2013-05-13,258.049988,255.720001,257.730011,258.679993,260.160004,263.630005,0.880005,264.51001
9,2013-05-14,255.720001,257.730011,258.679993,260.160004,263.630005,264.51001,3.819977,268.329987
10,2013-05-15,257.730011,258.679993,260.160004,263.630005,264.51001,268.329987,-1.769989,266.559998


In [14]:
# Define custom MAPE metric
def MAPE(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [26]:
from sklearn.model_selection import train_test_split

X = stacked_df.drop(columns=["date","moving_target", "target"])
y = stacked_df["moving_target"]

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=34)  # Split based on time

# Step 2: Feature Engineering (if needed)

# Step 3: Model Training
model = CatBoostRegressor(iterations=10000, learning_rate=0.0001, depth=6, loss_function='MAPE')
model.fit(X_train, y_train, eval_set=(X_val, y_val), verbose=100)

# Step 4: Model Evaluation
y_pred = model.predict(X_val)
mape = MAPE(y_val, y_pred)
print(f'MAPE on validation set: {mape}')

0:	learn: 0.9618220	test: 0.9650347	best: 0.9650347 (0)	total: 1.45ms	remaining: 14.5s
100:	learn: 0.9617612	test: 0.9650162	best: 0.9650143 (70)	total: 125ms	remaining: 12.3s
200:	learn: 0.9616999	test: 0.9650260	best: 0.9650143 (70)	total: 244ms	remaining: 11.9s
300:	learn: 0.9616375	test: 0.9650347	best: 0.9650143 (70)	total: 361ms	remaining: 11.6s
400:	learn: 0.9615771	test: 0.9650353	best: 0.9650143 (70)	total: 479ms	remaining: 11.5s
500:	learn: 0.9615167	test: 0.9650168	best: 0.9650120 (480)	total: 597ms	remaining: 11.3s
600:	learn: 0.9614547	test: 0.9650244	best: 0.9650120 (480)	total: 719ms	remaining: 11.2s
700:	learn: 0.9613963	test: 0.9649873	best: 0.9649814 (645)	total: 838ms	remaining: 11.1s
800:	learn: 0.9613378	test: 0.9649756	best: 0.9649755 (773)	total: 957ms	remaining: 11s
900:	learn: 0.9612766	test: 0.9649723	best: 0.9649706 (892)	total: 1.07s	remaining: 10.9s
1000:	learn: 0.9612163	test: 0.9649770	best: 0.9649705 (916)	total: 1.19s	remaining: 10.7s
1100:	learn: 0.961

In [27]:
y_val

190   -248.715355
246     12.990021
395     -5.529999
584     61.906316
663     -8.669983
          ...    
320      3.049988
533     -3.049988
933   -140.244832
385      3.219970
943   -255.285280
Name: moving_target, Length: 752, dtype: float64

In [28]:
y_pred

array([ 0.16742117,  0.15500691,  0.14155712,  0.17127781,  0.15209748,
        0.15673974,  0.13966694,  0.13186238,  0.1589244 ,  0.15395174,
        0.176328  ,  0.15442011,  0.17046897,  0.16521401,  0.17479162,
        0.16403467,  0.16409258,  0.12787611,  0.17638902,  0.15170798,
        0.17585317,  0.15339252,  0.17558259,  0.15237471,  0.1677269 ,
        0.1502027 ,  0.15762494,  0.17607574,  0.13826959,  0.17613588,
        0.17466356,  0.17037161,  0.13109269,  0.16573933,  0.14762382,
        0.14237635,  0.15203631,  0.1434457 ,  0.17635755,  0.15065573,
        0.1396302 ,  0.1765279 ,  0.15509127,  0.16932517,  0.13846364,
        0.15447624,  0.16138525,  0.17009577,  0.15807597,  0.1435843 ,
        0.15369644,  0.16310579,  0.15398537,  0.154003  ,  0.15560528,
        0.14302665,  0.17640265,  0.1334849 ,  0.14679106,  0.15456814,
        0.15735926,  0.14992978,  0.15461339,  0.17696412,  0.12691377,
        0.1583676 ,  0.16956709,  0.17222242,  0.14943909,  0.16