In [None]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=RuntimeWarning)

import pandas as pd
import numpy as np
import os
import xgboost as xgb
import matplotlib.pyplot as plt

from ta import add_all_ta_features
from ta.utils import dropna
from pathlib import Path
from sklearn.model_selection import TimeSeriesSplit
from sklearn.model_selection import cross_val_score, GridSearchCV, KFold, RandomizedSearchCV, train_test_split
from sklearn.metrics import auc, accuracy_score, confusion_matrix, mean_squared_error

from scipy.stats import uniform, randint

In [None]:
turbo_path = "C:/_repos/hackaton-turbo/datasets/"
featured_path = "C:/_repos/hackaton-turbo/datasets/featured/"

In [None]:
def percentages_moves(df, column_old, column_new):
    for i in range(0, len(df)):
        if i == 0:
             df.loc[i, column_new] = 0
        else:
            df.loc[i, column_new] = 100 - df.loc[i, column_old] * 100 / df.loc[i-1, column_old]
            
            
def future_price(df, column_old, column_new):
    for i in range(0, len(df)):
        if i == len(df) - 1:
             df.loc[i, column_new] = df.loc[i, column_old]
        else:
            df.loc[i, column_new] = df.loc[i+1, column_old]
            
def fill_na(df, column):
    for i in range(0, len(df)):
        if np.isnan(df.loc[i, column]) and i != 0:
            df.loc[i, column] = df.loc[i - 1, column]
        elif i == 0:
            df.loc[i, column] = df[column].mean()

In [None]:
top_df = pd.read_csv("C:/_repos/hackaton-turbo/datasets/jse-percent-correlation/adjusted_price_percent_correlation_top_10.csv")
top_df

In [None]:
# create percentage moves and future prices
stocks_markets = ['jse']
for sm in stocks_markets:
    print(sm)
    sm_path = os.path.join(featured_path, sm)
    for f in os.listdir(sm_path):
        print(f)
        f_path = os.path.join(sm_path, f)
        df = pd.read_csv(f_path)
        percentages_moves(df, 'Adj Close', 'Adj Close Percent')
        future_price(df, 'Adj Close', 'Future Price')
        df['DayOfWeek'] = pd.to_datetime(df['Date']).dt.dayofweek
        df.to_csv(f_path, index=False)

In [None]:
# add foreign markets features
jse_market_path = sm_path = os.path.join(featured_path, 'jse')

for jse in os.listdir(jse_market_path):
    jse_path = os.path.join(jse_market_path, jse)
    jse_name = Path(jse_path).stem
    
    js_df = pd.read_csv(jse_path)
    
    jse_top_corr_df = top_df[top_df['JSE_STOCK'] == jse_name]    
    
    def add_features(row_tuple, js_df):
        row = row_tuple[1]
        sm = row['TARGET_MARKET']
        tst = row['TARGET_STOCK']
        f_sm_path = os.path.join(featured_path, sm, f'{tst}.csv')
        f_sm_df = pd.read_csv(f_sm_path, usecols=['Date', 'Adj Close Percent'])
        f_sm_df.rename(columns={'Adj Close Percent': f'{sm}_{tst}_Close_percent' }, inplace=True)
        return js_df.merge(f_sm_df, on='Date', how='left')   
                
    for row in jse_top_corr_df.iterrows():
        js_df = add_features(row, js_df)
    
    js_df.to_csv(jse_path, index=False) 

In [None]:
for jse in os.listdir(jse_market_path):
    jse_path = os.path.join(jse_market_path, jse)
    jse_name = Path(jse_path).stem
    
    js_df = pd.read_csv(jse_path)
    last_10_cols = js_df.columns.tolist()[-10:]
    for c in last_10_cols:
        js_df.loc[0, c] = js_df.loc[1, c]
        fill_na(js_df, c)
    js_df.to_csv(jse_path, index=False)

In [None]:
# create and save model
df = pd.read_csv("C:/_repos/hackaton-turbo/datasets/featured/jse/ABG.JO_2021-09-15_2023-09-15.csv")

drop_features = ['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Future Price']
X = df.drop(labels=drop_features, axis=1)
y = df['Future Price']

test_limit = (int)(len(df) / 30)


train_limit = len(df) - test_limit

X_train = X.iloc[0:train_limit, :]
X_test = X.iloc[train_limit:,:]
y_train = y.iloc[0:train_limit]
y_test = y.iloc[train_limit:]

xgb_model = xgb.XGBRegressor(objective="reg:squarederror")
xgb_model.fit(X_train, y_train)
y_pred = xgb_model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print("RMSE: %f" % (rmse))
    
#plt.rcParams['figure.figsize'] = (5, 50)

#xgb.plot_importance(xgb_model)
#xgb_model.save_model("model.json")