In [8]:
import pandas as pd
import warnings
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, r2_score

warnings.filterwarnings('ignore')

data = pd.read_csv('../CSV/FullMain.csv')

results = {}

tickers = data['<TICKER>'].unique()

for ticker in tickers:

    ticker_data = data[data['<TICKER>'] == ticker]
    ticker_data['prev_open'] = ticker_data['<OPEN>'].shift(14)
    ticker_data['prev_high'] = ticker_data['<HIGH>'].shift(14)
    ticker_data['prev_close'] = ticker_data['<CLOSE>'].shift(14)
    ticker_data['prev_low'] = ticker_data['<LOW>'].shift(14)
    ticker_data['prev_vol'] = ticker_data['<VOL>'].shift(14)
    ticker_data.rename(columns={'<OPEN>': 'OPEN', '<CLOSE>':'CLOSE'}, inplace=True)
    ticker_data = ticker_data.drop(ticker_data.index[:14])
    ticker_data = ticker_data.reset_index(drop=True)

    X = ticker_data[['prev_open', 'prev_high', 'prev_close', 'prev_low', 'prev_vol', 'OPEN']]
    y = ticker_data['CLOSE']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = XGBRegressor()
    model.fit(X_train, y_train)



    y_pred = model.predict(X_test)

    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    results[ticker] = {'mean_absolute_error': mae, 'r2_score': r2}

results

{'AFLT': {'mean_absolute_error': 0.1830584196075988,
  'r2_score': 0.9995561014399683},
 'ALRS': {'mean_absolute_error': 0.4428067733450844,
  'r2_score': 0.9991856093339897},
 'GAZP': {'mean_absolute_error': 1.0761971041721061,
  'r2_score': 0.9989444379945562},
 'GMKN': {'mean_absolute_error': 78.98456453848007,
  'r2_score': 0.9987802317396153},
 'HYDR': {'mean_absolute_error': 0.0024685250814898765,
  'r2_score': 0.9898733269897054},
 'IRAO': {'mean_absolute_error': 0.015337971927574183,
  'r2_score': 0.999044078462003},
 'LKOH': {'mean_absolute_error': 21.642807830575194,
  'r2_score': 0.9983903179540801},
 'MGNT': {'mean_absolute_error': 20.415010820011247,
  'r2_score': 0.9957202325916908},
 'MOEX': {'mean_absolute_error': 0.5106795127103749,
  'r2_score': 0.9993769298242466},
 'MTLR': {'mean_absolute_error': 0.9814610890127456,
  'r2_score': 0.9970924047775135},
 'MTSS': {'mean_absolute_error': 0.8559207200391131,
  'r2_score': 0.9986346365723576},
 'NLMK': {'mean_absolute_erro

In [23]:
import pandas as pd
import warnings
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LinearRegression
import xgboost as xgb
from sklearn.metrics import mean_absolute_error, r2_score

warnings.filterwarnings('ignore')

data = pd.read_csv('../CSV/FullMain.csv')

results = {}

tickers = data['<TICKER>'].unique()

for ticker in tickers:

    ticker_data = data[data['<TICKER>'] == ticker]
    ticker_data['prev_open'] = ticker_data['<OPEN>'].shift(14)
    ticker_data['prev_high'] = ticker_data['<HIGH>'].shift(14)
    ticker_data['prev_close'] = ticker_data['<CLOSE>'].shift(14)
    ticker_data['prev_low'] = ticker_data['<LOW>'].shift(14)
    ticker_data['prev_vol'] = ticker_data['<VOL>'].shift(14)
    ticker_data.rename(columns={'<OPEN>': 'OPEN', '<CLOSE>':'CLOSE'}, inplace=True)
    ticker_data = ticker_data.drop(ticker_data.index[:14])
    ticker_data = ticker_data.reset_index(drop=True)

    X = ticker_data[['prev_open', 'prev_high', 'prev_close', 'prev_low', 'prev_vol', 'OPEN']]
    y = ticker_data['CLOSE']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    xgb_model = xgb.XGBRegressor()

    params = {
        'learning_rate': [0.01, 0.03, 0.05, 0.07],
        'max_depth': [4, 5, 6],
        'n_estimators': [150, 200, 250]
    }

    grid_search = GridSearchCV(estimator=xgb_model, param_grid=params, scoring='neg_mean_absolute_error', cv=3, verbose=1)

    grid_search.fit(X_train, y_train)

    best_params = grid_search.best_params_

    print(f"Best parameters for {ticker}: {best_params}")
    # y_pred = cv_model.predict(X_test)

#     mae = mean_absolute_error(y_test, y_pred)
#     r2 = r2_score(y_test, y_pred)
#
#     results[ticker] = {'mean_absolute_error': mae, 'r2_score': r2}
#
# results

Fitting 3 folds for each of 36 candidates, totalling 108 fits
Best parameters for AFLT: {'learning_rate': 0.05, 'max_depth': 4, 'n_estimators': 200}
Fitting 3 folds for each of 36 candidates, totalling 108 fits
Best parameters for ALRS: {'learning_rate': 0.03, 'max_depth': 4, 'n_estimators': 250}
Fitting 3 folds for each of 36 candidates, totalling 108 fits
Best parameters for GAZP: {'learning_rate': 0.03, 'max_depth': 6, 'n_estimators': 250}
Fitting 3 folds for each of 36 candidates, totalling 108 fits
Best parameters for GMKN: {'learning_rate': 0.05, 'max_depth': 4, 'n_estimators': 200}
Fitting 3 folds for each of 36 candidates, totalling 108 fits
Best parameters for HYDR: {'learning_rate': 0.03, 'max_depth': 4, 'n_estimators': 250}
Fitting 3 folds for each of 36 candidates, totalling 108 fits
Best parameters for IRAO: {'learning_rate': 0.05, 'max_depth': 4, 'n_estimators': 200}
Fitting 3 folds for each of 36 candidates, totalling 108 fits
Best parameters for LKOH: {'learning_rate': 

In [34]:
import pandas as pd
import warnings
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, r2_score

warnings.filterwarnings('ignore')

data = pd.read_csv('../CSV/FullMain.csv')

results = {}

tickers = data['<TICKER>'].unique()

for ticker in tickers:

    ticker_data = data[data['<TICKER>'] == ticker]
    ticker_data['prev_open'] = ticker_data['<OPEN>'].shift(14)
    ticker_data['prev_high'] = ticker_data['<HIGH>'].shift(14)
    ticker_data['prev_close'] = ticker_data['<CLOSE>'].shift(14)
    ticker_data['prev_low'] = ticker_data['<LOW>'].shift(14)
    ticker_data['prev_vol'] = ticker_data['<VOL>'].shift(14)
    ticker_data.rename(columns={'<OPEN>': 'OPEN', '<CLOSE>':'CLOSE'}, inplace=True)
    ticker_data = ticker_data.drop(ticker_data.index[:14])
    ticker_data = ticker_data.reset_index(drop=True)

    X = ticker_data[['prev_open', 'prev_high', 'prev_close', 'prev_low', 'prev_vol', 'OPEN']]
    y = ticker_data['CLOSE']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    params={'learning_rate': 0.05, 'max_depth': 4, 'n_estimators': 250}
    model = XGBRegressor(**params)
    # model = XGBRegressor()
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)

    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    results[ticker] = {'mean_absolute_error': mae, 'r2_score': r2}

results

{'AFLT': {'mean_absolute_error': 0.16177808433081617,
  'r2_score': 0.9996642622569858},
 'ALRS': {'mean_absolute_error': 0.40707213129666286,
  'r2_score': 0.9991896888791142},
 'GAZP': {'mean_absolute_error': 0.9866662668262471,
  'r2_score': 0.9991144090287288},
 'GMKN': {'mean_absolute_error': 70.71710503193285,
  'r2_score': 0.9989869564671904},
 'HYDR': {'mean_absolute_error': 0.00224232900743313,
  'r2_score': 0.990708379167234},
 'IRAO': {'mean_absolute_error': 0.014462619160012298,
  'r2_score': 0.9989477406714792},
 'LKOH': {'mean_absolute_error': 20.149583674335236,
  'r2_score': 0.9981127689681838},
 'MGNT': {'mean_absolute_error': 19.143097943695775,
  'r2_score': 0.9964831576880735},
 'MOEX': {'mean_absolute_error': 0.45311555073009957,
  'r2_score': 0.9994522823000497},
 'MTLR': {'mean_absolute_error': 0.8774450719431917,
  'r2_score': 0.9974901253207968},
 'MTSS': {'mean_absolute_error': 0.7581437941994951,
  'r2_score': 0.9989349767478908},
 'NLMK': {'mean_absolute_err