In [241]:
import numpy as np
import yfinance as yf  
import pandas as pd
from sklearn.model_selection import TimeSeriesSplit  

In [143]:
#опишем функции



def compute_rsi(data, window=14):  
    """Вычисляет индекс относительной силы (RSI) для заданной серии цен."""  
    delta = data.diff()   # Разница между текущим и предшествующим значением  
    gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()  # Средние приросты  
    loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()  # Средние убытки  

    rs = gain / loss  # Соотношение приростов к убыткам  
    rsi = 100 - (100 / (1 + rs))  # Формула RSI  
    return rsi  

def fitch_gen(data):
    """Модифицирует таблицу под наши нужды"""
    data.reset_index(inplace=True)

    data['Price Change'] = data['Close'].diff()  
    data['Percentage Change'] = data['Close'].pct_change()  

    #добавим скользящие средние
    data['SMA_5'] = data['Close'].shift(1).rolling(window=5).mean()  
    data['SMA_20'] = data['Close'].shift(1).rolling(window=20).mean()  

    #и индекс относительной силы
    data['RSI'] = compute_rsi(data['Close'], window=14) 

    

    # Лаговые признаки  
    data['Prev Close'] = data['Close'].shift(1)  
    data['Prev Volume'] = data['Volume'].shift(1)

    #переведем информацию о дате в подходящий для модели формат
    data['day_of_week'] = data['Date'].dt.dayofweek
    data['month'] = data['Date'].dt.month
    data['year'] = data['Date'].dt.year

    #для защиты от подглядывания удалим лишние колонки
    data.drop(['Volume', 'Low', 'High', 'Date'], axis=1, inplace=True)
    data.dropna(inplace=True)
    return data

def remove_outliers(df, column, percentile=0.5):  
    """удаляет выбросы с заданым порогом"""
    lower_bound = df[column].quantile(percentile / 100)  
    upper_bound = df[column].quantile(1 - (percentile / 100))  
    return df[(df[column] >= lower_bound) & (df[column] <= upper_bound)] 

**Apple**

In [226]:
#выгружаем датасет
data_appl = yf.download('AAPL', start='2015-01-01', end='2023-12-31')  

[*********************100%***********************]  1 of 1 completed


In [227]:
data_appl_red = fitch_gen(data_appl)

In [228]:
data_appl_red

Unnamed: 0,Open,Close,Adj Close,Price Change,Percentage Change,SMA_5,SMA_20,RSI,Prev Close,Prev Volume,day_of_week,month,year
20,29.512501,29.657499,26.447296,0.367498,0.012547,28.680500,27.660375,66.039668,29.290001,334982000.0,0,2,2015
21,29.625000,29.662500,26.451754,0.005001,0.000169,28.957000,27.776625,64.899258,29.657499,250956400.0,1,2,2015
22,29.625000,29.889999,26.654629,0.227499,0.007670,29.432500,27.931625,66.956211,29.662500,207662800.0,2,2,2015
23,30.004999,29.985001,26.844877,0.095001,0.003178,29.645000,28.097875,75.057301,29.889999,280598800.0,3,2,2015
24,30.004999,29.732500,26.618820,-0.252501,-0.008421,29.697000,28.250250,74.544769,29.985001,168984800.0,4,2,2015
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2259,195.179993,193.600006,192.868134,-1.079987,-0.005547,195.982001,193.487000,59.246142,194.679993,46482500.0,4,12,2023
2260,193.610001,193.050003,192.320221,-0.550003,-0.002841,195.188000,193.668501,49.031934,193.600006,37122800.0,1,12,2023
2261,192.490005,193.149994,192.419830,0.099991,0.000518,194.620001,193.831501,52.291513,193.050003,28919300.0,2,12,2023
2262,194.139999,193.580002,192.848221,0.430008,0.002226,193.862000,193.969001,47.920430,193.149994,48087700.0,3,12,2023


In [229]:
for col in data_appl_red.columns:
    data_appl_red = remove_outliers(data_appl_red, col)
data_appl_red


Unnamed: 0,Open,Close,Adj Close,Price Change,Percentage Change,SMA_5,SMA_20,RSI,Prev Close,Prev Volume,day_of_week,month,year
20,29.512501,29.657499,26.447296,0.367498,0.012547,28.680500,27.660375,66.039668,29.290001,334982000.0,0,2,2015
21,29.625000,29.662500,26.451754,0.005001,0.000169,28.957000,27.776625,64.899258,29.657499,250956400.0,1,2,2015
22,29.625000,29.889999,26.654629,0.227499,0.007670,29.432500,27.931625,66.956211,29.662500,207662800.0,2,2,2015
23,30.004999,29.985001,26.844877,0.095001,0.003178,29.645000,28.097875,75.057301,29.889999,280598800.0,3,2,2015
24,30.004999,29.732500,26.618820,-0.252501,-0.008421,29.697000,28.250250,74.544769,29.985001,168984800.0,4,2,2015
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2228,182.350006,182.889999,181.958908,1.069992,0.005885,177.848001,175.118002,63.494342,181.820007,70530000.0,2,11,2023
2229,182.960007,182.410004,181.481354,-0.479996,-0.002625,179.632001,175.272501,68.737692,182.889999,49340300.0,3,11,2023
2230,183.970001,186.399994,185.695358,3.989990,0.021874,180.600000,175.357501,72.866869,182.410004,53763500.0,4,11,2023
2231,185.820007,184.800003,184.101395,-1.599991,-0.008584,182.550000,175.735001,68.647403,186.399994,66133400.0,0,11,2023


In [230]:
from catboost import CatBoostRegressor, cv, Pool
from sklearn.model_selection import train_test_split  

In [231]:
data_appl_red_X = data_appl_red.drop(columns=['Close'])  
data_appl_red_y = data_appl_red['Close'] 

In [232]:
categorical_features = ['day_of_week', 'month', 'year']  

In [233]:
X_train_appl, X_test_appl, y_train_appl, y_test_appl = train_test_split(data_appl_red_X, data_appl_red_y, test_size=0.2, random_state=42) 

In [234]:
train_pool = Pool(X_train_appl, y_train_appl, cat_features=categorical_features) 
params = {  
    'iterations': 1000,  
    'learning_rate': 0.1,  
    'depth': 6,  
    'loss_function': 'RMSE',  
    'verbose': 100  
}

In [235]:
cv_results = cv(train_pool, params, fold_count=3, shuffle=True, stratified=True, verbose=100)  

Training on fold [0/3]
0:	learn: 94.1156434	test: 92.4876722	best: 92.4876722 (0)	total: 15.5ms	remaining: 15.5s




100:	learn: 2.2313252	test: 11.1314609	best: 11.1314609 (100)	total: 939ms	remaining: 8.36s
200:	learn: 0.7183107	test: 10.9023938	best: 10.8924653 (191)	total: 1.92s	remaining: 7.65s
300:	learn: 0.2807098	test: 10.8648559	best: 10.8622583 (292)	total: 2.87s	remaining: 6.66s
400:	learn: 0.1274140	test: 10.8428870	best: 10.8422398 (391)	total: 3.84s	remaining: 5.74s
500:	learn: 0.0584791	test: 10.8319369	best: 10.8318432 (499)	total: 4.82s	remaining: 4.8s
600:	learn: 0.0285698	test: 10.8276350	best: 10.8273922 (594)	total: 5.86s	remaining: 3.89s
700:	learn: 0.0144105	test: 10.8264520	best: 10.8264152 (692)	total: 6.95s	remaining: 2.96s
800:	learn: 0.0077959	test: 10.8258897	best: 10.8258200 (791)	total: 7.92s	remaining: 1.97s
900:	learn: 0.0037513	test: 10.8256269	best: 10.8256269 (900)	total: 8.93s	remaining: 982ms
999:	learn: 0.0018941	test: 10.8256487	best: 10.8256186 (910)	total: 9.92s	remaining: 0us

bestTest = 10.82561858
bestIteration = 910

Training on fold [1/3]
0:	learn: 88.73



100:	learn: 1.5243153	test: 1.6072386	best: 1.6056047 (91)	total: 1.11s	remaining: 9.87s
200:	learn: 1.0204976	test: 1.4157494	best: 1.4149009 (199)	total: 2.19s	remaining: 8.71s
300:	learn: 0.7791683	test: 1.3519937	best: 1.3519937 (300)	total: 3.28s	remaining: 7.62s
400:	learn: 0.6421421	test: 1.3138030	best: 1.3135803 (399)	total: 4.49s	remaining: 6.71s
500:	learn: 0.5371552	test: 1.2751083	best: 1.2745159 (499)	total: 5.93s	remaining: 5.9s
600:	learn: 0.4604087	test: 1.2331946	best: 1.2323248 (594)	total: 7.14s	remaining: 4.74s
700:	learn: 0.4010240	test: 1.2170610	best: 1.2163907 (697)	total: 8.23s	remaining: 3.51s
800:	learn: 0.3611266	test: 1.2093790	best: 1.2078847 (794)	total: 9.33s	remaining: 2.32s
900:	learn: 0.3220034	test: 1.2046723	best: 1.2046723 (900)	total: 10.8s	remaining: 1.18s
999:	learn: 0.2904899	test: 1.1953007	best: 1.1948023 (996)	total: 11.8s	remaining: 0us

bestTest = 1.194802327
bestIteration = 996

Training on fold [2/3]
0:	learn: 88.7028345	test: 129.04616



100:	learn: 1.3775230	test: 2.0027834	best: 2.0027834 (100)	total: 1.07s	remaining: 9.52s
200:	learn: 0.9377584	test: 1.3835582	best: 1.3835582 (200)	total: 2.15s	remaining: 8.55s
300:	learn: 0.7363181	test: 1.3009977	best: 1.2255984 (281)	total: 3.23s	remaining: 7.49s
400:	learn: 0.6077514	test: 1.3447824	best: 1.2255984 (281)	total: 4.39s	remaining: 6.56s
500:	learn: 0.5113305	test: 1.2698971	best: 1.2255984 (281)	total: 5.49s	remaining: 5.47s
600:	learn: 0.4466632	test: 1.2119872	best: 1.2119872 (600)	total: 6.59s	remaining: 4.38s
700:	learn: 0.3934170	test: 1.1423572	best: 1.1405963 (699)	total: 7.68s	remaining: 3.28s
800:	learn: 0.3462813	test: 1.1381267	best: 1.1348234 (726)	total: 8.92s	remaining: 2.21s
900:	learn: 0.3114088	test: 1.1382241	best: 1.1346629 (837)	total: 10.1s	remaining: 1.11s
999:	learn: 0.2774545	test: 1.1313322	best: 1.1307932 (991)	total: 11.3s	remaining: 0us

bestTest = 1.130793216
bestIteration = 991



In [236]:
best_iterations = cv_results['iterations'].values[-1]  
model_apple = CatBoostRegressor(iterations=best_iterations, learning_rate=0.1, depth=6, loss_function='RMSE')  
model_apple.fit(X_train_appl, y_train_appl)

0:	learn: 47.9922542	total: 1.85ms	remaining: 1.84s
1:	learn: 43.5208484	total: 3.07ms	remaining: 1.53s
2:	learn: 39.5117407	total: 4.02ms	remaining: 1.33s
3:	learn: 35.9473013	total: 4.88ms	remaining: 1.21s
4:	learn: 32.5738457	total: 5.73ms	remaining: 1.14s
5:	learn: 29.5533003	total: 7.16ms	remaining: 1.19s
6:	learn: 26.8059480	total: 8.37ms	remaining: 1.19s
7:	learn: 24.4081619	total: 9.35ms	remaining: 1.16s
8:	learn: 22.1869482	total: 10.2ms	remaining: 1.12s
9:	learn: 20.2162653	total: 11.1ms	remaining: 1.1s
10:	learn: 18.3719606	total: 12.1ms	remaining: 1.08s
11:	learn: 16.6964228	total: 13.5ms	remaining: 1.11s
12:	learn: 15.1841767	total: 14.5ms	remaining: 1.1s
13:	learn: 13.8309878	total: 15.5ms	remaining: 1.09s
14:	learn: 12.5612301	total: 16.6ms	remaining: 1.09s
15:	learn: 11.4217481	total: 17.6ms	remaining: 1.08s
16:	learn: 10.4311764	total: 18.7ms	remaining: 1.08s
17:	learn: 9.5372357	total: 19.8ms	remaining: 1.08s
18:	learn: 8.6899903	total: 20.7ms	remaining: 1.07s
19:	lea

<catboost.core.CatBoostRegressor at 0x18fe861d4c0>

In [266]:
#train_preds = model.predict(X_train)
test_preds = model_apple.predict(X_test_appl)

In [293]:
#train_error = np.mean((train_preds - y_train.values.ravel())**2)
test_error = np.mean((test_preds - y_test_appl.values.ravel())**2)


In [294]:
#print(f"Качество на трейне: {train_error.round(3)}")
print(f"Качество на тесте: {test_error.round(3)}")

Качество на тесте: 0.853


In [264]:
final_data_appl = yf.download('AAPL', start='2024-01-01', end='2024-12-31')  # Пример  
test_data_appl_new = fitch_gen(final_data_appl)

X_final_appl = test_data_appl_new.drop(columns=['Close']) 
y_final_appl = test_data_appl_new['Close'] 

 

[*********************100%***********************]  1 of 1 completed


**Nvidia**

In [188]:
data_nvda = yf.download('NVDA', start='2015-01-01', end='2023-12-31')  

[*********************100%***********************]  1 of 1 completed


In [189]:
data_nvda_new = fitch_gen(data_nvda)

In [190]:
data_nvda_new

Unnamed: 0,Open,Close,Adj Close,Price Change,Percentage Change,SMA_5,SMA_20,RSI,Prev Close,Prev Volume,day_of_week,month,year
20,0.483000,0.490500,0.470935,0.010500,0.021875,0.492700,0.496150,49.172581,0.480000,298200000.0,0,2,2015
21,0.494000,0.502750,0.482697,0.012250,0.024974,0.487700,0.495513,54.797435,0.490500,259604000.0,1,2,2015
22,0.501250,0.504000,0.483897,0.001250,0.002486,0.490100,0.495913,54.506441,0.502750,183468000.0,2,2,2015
23,0.504500,0.512250,0.491818,0.008250,0.016369,0.494350,0.497125,59.175249,0.504000,181516000.0,3,2,2015
24,0.511750,0.510000,0.489658,-0.002250,-0.004392,0.497900,0.498813,54.803479,0.512250,140448000.0,4,2,2015
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2259,49.195000,48.830002,48.819527,-0.160000,-0.003266,49.134400,47.679450,64.490223,48.990002,300425000.0,4,12,2023
2260,48.967999,49.278999,49.268425,0.448997,0.009195,49.122401,47.732150,62.503437,48.830002,252507000.0,1,12,2023
2261,49.511002,49.417000,49.406395,0.138000,0.002800,48.962801,47.784000,69.719871,49.278999,244200000.0,2,12,2023
2262,49.643002,49.521999,49.511375,0.105000,0.002125,48.925401,47.863800,66.371970,49.417000,233648000.0,3,12,2023


In [198]:
for col in data_nvda_new.columns:
    data_nvda_new = remove_outliers(data_nvda_new, col)

In [199]:
data_nvda_new

Unnamed: 0,Open,Close,Adj Close,Price Change,Percentage Change,SMA_5,SMA_20,RSI,Prev Close,Prev Volume,day_of_week,month,year
38,0.553500,0.551500,0.531541,-0.003500,-0.006306,0.55575,0.530425,73.850639,0.555000,152864000.0,4,2,2015
39,0.550000,0.565000,0.544553,0.013500,0.024479,0.55435,0.533275,77.750028,0.551500,157492000.0,0,3,2015
40,0.560750,0.554500,0.534432,-0.010500,-0.018584,0.55655,0.537525,65.974034,0.565000,301512000.0,1,3,2015
41,0.552500,0.560750,0.540457,0.006250,0.011271,0.55590,0.540725,70.454555,0.554500,160344000.0,2,3,2015
42,0.560750,0.571500,0.550817,0.010750,0.019171,0.55735,0.543625,59.655181,0.560750,226840000.0,3,3,2015
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2223,40.883999,42.325001,42.312202,1.545002,0.037886,40.90920,43.583750,34.802636,40.779999,517969000.0,2,11,2023
2224,43.327999,43.506001,43.492844,1.181000,0.027903,41.01840,43.497950,43.438283,42.325001,437593000.0,3,11,2023
2225,44.020000,45.005001,44.991390,1.499001,0.034455,41.65440,43.438850,46.542316,43.506001,409172000.0,4,11,2023
2226,45.285000,45.750999,45.737164,0.745998,0.016576,42.55540,43.401000,56.316635,45.005001,424610000.0,0,11,2023


In [200]:
data_nvda_new_X = data_nvda_new.drop(columns=['Close'])  
data_nvda_new_y = data_nvda_new['Close']

In [195]:
X_train_nvda, X_test_nvda, y_train_nvda, y_test_nvda = train_test_split(data_nvda_new_X, data_nvda_new_y, test_size=0.2, random_state=42) 

In [196]:
train_pool_nvda = Pool(X_train_nvda, y_train_nvda, cat_features=categorical_features)

In [201]:
cv_results_nvda = cv(train_pool_nvda, params, fold_count=3, shuffle=True, stratified=True, verbose=100)  

best_iterations_nvda = cv_results_nvda['iterations'].values[-1]  
model_nvda = CatBoostRegressor(iterations=best_iterations_nvda, learning_rate=0.1, depth=6, loss_function='RMSE')  
model_nvda.fit(X_train_nvda, y_train_nvda)

test_preds_nvda= model_nvda.predict(X_test_nvda)
test_error_nvda = np.mean((test_preds_nvda - y_test_nvda.values.ravel())**2)
print(f"Качество на тесте: {test_error_nvda.round(3)}")

Training on fold [0/3]
0:	learn: 5.5902132	test: 16.3515950	best: 16.3515950 (0)	total: 10.9ms	remaining: 10.9s




100:	learn: 0.3737455	test: 9.9012005	best: 9.9012005 (100)	total: 895ms	remaining: 7.96s
200:	learn: 0.0734556	test: 9.7068525	best: 9.7068525 (200)	total: 1.95s	remaining: 7.74s
300:	learn: 0.0319042	test: 9.6914612	best: 9.6914612 (300)	total: 3.04s	remaining: 7.07s
400:	learn: 0.0130743	test: 9.6916692	best: 9.6913289 (302)	total: 4.05s	remaining: 6.05s
500:	learn: 0.0057792	test: 9.6910419	best: 9.6909826 (497)	total: 5s	remaining: 4.98s
600:	learn: 0.0027098	test: 9.6908465	best: 9.6908317 (550)	total: 5.98s	remaining: 3.97s
700:	learn: 0.0012521	test: 9.6908827	best: 9.6908279 (601)	total: 6.94s	remaining: 2.96s
800:	learn: 0.0005844	test: 9.6908754	best: 9.6908279 (601)	total: 7.88s	remaining: 1.96s
900:	learn: 0.0002672	test: 9.6908705	best: 9.6908279 (601)	total: 8.88s	remaining: 976ms
999:	learn: 0.0001180	test: 9.6908675	best: 9.6908279 (601)	total: 9.82s	remaining: 0us

bestTest = 9.690827903
bestIteration = 601

Training on fold [1/3]
0:	learn: 14.9909430	test: 5.5987881	



100:	learn: 0.3078912	test: 0.1741743	best: 0.1739624 (99)	total: 1.11s	remaining: 9.88s
200:	learn: 0.2047383	test: 0.1254741	best: 0.1254741 (200)	total: 2.28s	remaining: 9.06s
300:	learn: 0.1581781	test: 0.1025881	best: 0.1023534 (297)	total: 3.47s	remaining: 8.06s
400:	learn: 0.1286099	test: 0.0933961	best: 0.0933961 (400)	total: 4.62s	remaining: 6.9s
500:	learn: 0.1053682	test: 0.0844409	best: 0.0843496 (493)	total: 5.87s	remaining: 5.84s
600:	learn: 0.0910230	test: 0.0775704	best: 0.0775704 (600)	total: 6.94s	remaining: 4.61s
700:	learn: 0.0793582	test: 0.0777192	best: 0.0767568 (667)	total: 8.14s	remaining: 3.47s
800:	learn: 0.0711504	test: 0.0760360	best: 0.0758006 (788)	total: 9.36s	remaining: 2.33s
900:	learn: 0.0628601	test: 0.0729824	best: 0.0729824 (900)	total: 10.6s	remaining: 1.16s
999:	learn: 0.0567447	test: 0.0723380	best: 0.0723205 (966)	total: 11.6s	remaining: 0us

bestTest = 0.07232047383
bestIteration = 966

Training on fold [2/3]
0:	learn: 14.8912617	test: 0.40479



100:	learn: 0.3340622	test: 0.0406387	best: 0.0406357 (97)	total: 1.09s	remaining: 9.68s
200:	learn: 0.2231764	test: 0.0291348	best: 0.0290527 (198)	total: 2.19s	remaining: 8.71s
300:	learn: 0.1669315	test: 0.0250856	best: 0.0232558 (260)	total: 3.27s	remaining: 7.6s
400:	learn: 0.1306181	test: 0.0265794	best: 0.0232558 (260)	total: 4.35s	remaining: 6.5s
500:	learn: 0.1093404	test: 0.0270347	best: 0.0232558 (260)	total: 5.42s	remaining: 5.4s
600:	learn: 0.0923377	test: 0.0300005	best: 0.0232558 (260)	total: 6.48s	remaining: 4.3s
700:	learn: 0.0800498	test: 0.0299961	best: 0.0232558 (260)	total: 7.51s	remaining: 3.2s
800:	learn: 0.0704106	test: 0.0310152	best: 0.0232558 (260)	total: 8.62s	remaining: 2.14s
900:	learn: 0.0629307	test: 0.0297819	best: 0.0232558 (260)	total: 9.68s	remaining: 1.06s
999:	learn: 0.0564738	test: 0.0301582	best: 0.0232558 (260)	total: 10.7s	remaining: 0us

bestTest = 0.02325577256
bestIteration = 260

0:	learn: 10.8341232	total: 1.15ms	remaining: 1.15s
1:	learn:

**Google**

In [213]:
data_ggl = yf.download('GOOGL', start='2015-01-01', end='2023-12-31')  

[*********************100%***********************]  1 of 1 completed


In [214]:
data_ggl_new = fitch_gen(data_ggl)

In [215]:
data_ggl_new

Unnamed: 0,Open,Close,Adj Close,Price Change,Percentage Change,SMA_5,SMA_20,RSI,Prev Close,Prev Volume,day_of_week,month,year
20,26.716000,26.610001,26.544273,-0.267500,-0.009953,26.211200,25.795975,66.007656,26.877501,121108000.0,0,2,2015
21,26.497000,26.665001,26.599138,0.055000,0.002067,26.166000,25.802600,64.841697,26.610001,75378000.0,1,2,2015
22,26.657000,26.305000,26.240026,-0.360001,-0.013501,26.287101,25.837200,59.236201,26.665001,47062000.0,2,2,2015
23,26.396500,26.491501,26.426065,0.186501,0.007090,26.423801,25.885850,61.630628,26.305000,33896000.0,3,2,2015
24,26.550501,26.694000,26.628065,0.202499,0.007644,26.589801,25.947550,60.782680,26.491501,33176000.0,4,2,2015
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2259,140.770004,141.490005,141.140518,1.070007,0.007620,136.762000,134.300501,75.206283,140.419998,27488300.0,4,12,2023
2260,141.589996,141.520004,141.170441,0.029999,0.000212,138.539999,134.540501,73.348125,141.490005,26514600.0,1,12,2023
2261,141.589996,140.369995,140.023270,-1.150009,-0.008126,139.684000,134.796001,72.767252,141.520004,16780300.0,2,12,2023
2262,140.779999,140.229996,139.883621,-0.139999,-0.000997,140.428000,134.954501,60.338351,140.369995,19628600.0,3,12,2023


In [208]:
for col in data_ggl_new.columns:
    data_ggl_new = remove_outliers(data_ggl_new, col)

In [216]:
data_ggl_new_x = data_ggl_new.drop(columns=['Close'])  
data_ggl_new_y = data_ggl_new['Close']

In [217]:
X_train_ggl, X_test_ggl, y_train_ggl, y_test_ggl = train_test_split(data_ggl_new_x, data_ggl_new_y, test_size=0.2, random_state=42) 

train_pool_ggl = Pool(X_train_ggl, y_train_ggl, cat_features=categorical_features)

cv_results_ggl = cv(train_pool_ggl, params, fold_count=3, shuffle=True, stratified=True, verbose=100)  

best_iterations_ggl = cv_results_ggl['iterations'].values[-1]  
model_ggl = CatBoostRegressor(iterations=best_iterations_ggl, learning_rate=0.1, depth=6, loss_function='RMSE')  
model_ggl.fit(X_train_ggl, y_train_ggl)

test_preds_ggl= model_ggl.predict(X_test_ggl)
test_error_ggl = np.mean((test_preds_ggl - y_test_ggl.values.ravel())**2)
print(f"Качество на тесте: {test_error_ggl.round(3)}")

Training on fold [0/3]
0:	learn: 81.8341127	test: 79.1665591	best: 79.1665591 (0)	total: 10.7ms	remaining: 10.7s




100:	learn: 2.3004860	test: 21.7995557	best: 21.7995557 (100)	total: 885ms	remaining: 7.88s
200:	learn: 0.3106587	test: 21.1033940	best: 21.1033940 (200)	total: 1.79s	remaining: 7.11s
300:	learn: 0.0613306	test: 21.0568077	best: 21.0568077 (300)	total: 2.73s	remaining: 6.33s
400:	learn: 0.0185509	test: 21.0527167	best: 21.0526925 (396)	total: 3.65s	remaining: 5.45s
500:	learn: 0.0045647	test: 21.0515979	best: 21.0515979 (500)	total: 4.56s	remaining: 4.54s
600:	learn: 0.0011292	test: 21.0512975	best: 21.0512593 (581)	total: 5.44s	remaining: 3.61s
700:	learn: 0.0004022	test: 21.0513573	best: 21.0512593 (581)	total: 6.28s	remaining: 2.68s
800:	learn: 0.0001535	test: 21.0513788	best: 21.0512593 (581)	total: 7.15s	remaining: 1.78s
900:	learn: 0.0000697	test: 21.0513827	best: 21.0512593 (581)	total: 8.03s	remaining: 883ms
999:	learn: 0.0000380	test: 21.0513830	best: 21.0512593 (581)	total: 8.9s	remaining: 0us

bestTest = 21.05125932
bestIteration = 581

Training on fold [1/3]
0:	learn: 74.45



100:	learn: 1.1478568	test: 0.9602928	best: 0.9602928 (100)	total: 1.07s	remaining: 9.53s
200:	learn: 0.8002597	test: 0.7093293	best: 0.7044961 (198)	total: 2.14s	remaining: 8.53s
300:	learn: 0.6384477	test: 0.6460566	best: 0.6425721 (284)	total: 3.27s	remaining: 7.58s
400:	learn: 0.5300225	test: 0.6381838	best: 0.6353190 (332)	total: 4.37s	remaining: 6.53s
500:	learn: 0.4514993	test: 0.6247506	best: 0.6173419 (496)	total: 5.47s	remaining: 5.45s
600:	learn: 0.3885390	test: 0.6157650	best: 0.6066839 (538)	total: 6.58s	remaining: 4.37s
700:	learn: 0.3448443	test: 0.6091754	best: 0.6066839 (538)	total: 7.66s	remaining: 3.27s
800:	learn: 0.3078444	test: 0.6096710	best: 0.6035257 (753)	total: 8.75s	remaining: 2.17s
900:	learn: 0.2788742	test: 0.5929139	best: 0.5909779 (890)	total: 9.87s	remaining: 1.08s
999:	learn: 0.2527686	test: 0.5862190	best: 0.5853599 (987)	total: 11s	remaining: 0us

bestTest = 0.585359878
bestIteration = 987

Training on fold [2/3]
0:	learn: 74.4095990	test: 96.025900



100:	learn: 1.2692260	test: 1.7173539	best: 0.9235243 (59)	total: 1.05s	remaining: 9.39s
200:	learn: 0.8241261	test: 2.2696066	best: 0.9235243 (59)	total: 2.13s	remaining: 8.48s
300:	learn: 0.6514962	test: 2.5397622	best: 0.9235243 (59)	total: 3.21s	remaining: 7.46s
400:	learn: 0.5393108	test: 2.7283620	best: 0.9235243 (59)	total: 4.29s	remaining: 6.41s
500:	learn: 0.4676477	test: 2.8024176	best: 0.9235243 (59)	total: 5.38s	remaining: 5.36s
600:	learn: 0.4073459	test: 2.8189898	best: 0.9235243 (59)	total: 6.45s	remaining: 4.28s
700:	learn: 0.3616513	test: 2.8298009	best: 0.9235243 (59)	total: 7.59s	remaining: 3.24s
800:	learn: 0.3261251	test: 2.8842054	best: 0.9235243 (59)	total: 8.67s	remaining: 2.15s
900:	learn: 0.2976602	test: 2.9042718	best: 0.9235243 (59)	total: 9.75s	remaining: 1.07s
999:	learn: 0.2729596	test: 2.8829069	best: 0.9235243 (59)	total: 10.8s	remaining: 0us

bestTest = 0.9235242997
bestIteration = 59

0:	learn: 32.3930180	total: 1.93ms	remaining: 1.93s
1:	learn: 29.41

**Coca-Cola Company**

In [243]:
data_ko = yf.download('KO', start='2015-01-01', end='2023-12-31')  

[*********************100%***********************]  1 of 1 completed


In [244]:
data_ko_new = fitch_gen(data_ko)

In [245]:
for col in data_ko_new.columns:
    data_ko_new = remove_outliers(data_ko_new, col)

In [246]:
data_ko_new_x = data_ko_new.drop(columns=['Close'])  
data_ko_new_y = data_ko_new['Close']

In [248]:
tscv = TimeSeriesSplit(n_splits=5)  # Количество сплитов можно варьировать  

test_errors = []  

for train_index, test_index in tscv.split(data_ko_new_x):  
    X_train, X_test = data_ko_new_x.values[train_index], data_ko_new_x.values[test_index]  
    y_train, y_test = data_ko_new_y.values[train_index], data_ko_new_y.values[test_index]  

    # Создание пула для CatBoost  
    train_pool = Pool(X_train, y_train)   

    # Обучение модели  
    model_ko = CatBoostRegressor(**params)  
    model_ko.fit(X_train, y_train, verbose=0)  

    # Предсказание на тестовом наборе  
    test_preds = model_ko.predict(X_test)  

    # Вычисление ошибки  
    test_error = np.mean((test_preds - y_test) ** 2)  # MSE  
    test_errors.append(test_error)  

# Вывод среднеквадратичной ошибки для каждой итерации  
for i, error in enumerate(test_errors):  
    print(f"Кросс-валидация {i + 1}: Качество на тесте: {error.round(3)}")  

# Вывод среднеарифметической ошибки по всем сплитам  
print(f"Среднее качество по всем сплитам: {np.mean(test_errors).round(3)}")  

Кросс-валидация 1: Качество на тесте: 0.242
Кросс-валидация 2: Качество на тесте: 1.431
Кросс-валидация 3: Качество на тесте: 8.993
Кросс-валидация 4: Качество на тесте: 1.968
Кросс-валидация 5: Качество на тесте: 0.817
Среднее качество по всем сплитам: 2.69


In [255]:
final_model_ko = CatBoostRegressor(**params)  
final_model_ko.fit(data_ko_new_x, data_ko_new_y, verbose=0) 


<catboost.core.CatBoostRegressor at 0x18fe86522a0>

In [256]:
test_data_ko = yf.download('KO', start='2024-01-01', end='2024-12-31')  # Пример  
test_data_ko_new = fitch_gen(test_data_ko)  
#test_data_ko_new = remove_outliers(test_data_ko_new)  

# Подготовка данных  
test_x = test_data_ko_new.drop(columns=['Close']).values  
test_y = test_data_ko_new['Close'].values  

# Прогноз на тестовом наборе данных  
test_preds = final_model_ko.predict(test_x)  

# Оценка качества  
test_error = np.mean((test_preds - test_y) ** 2)  # MSE  
print(f"Качество на тестовом наборе: {test_error.round(3)}")  

[*********************100%***********************]  1 of 1 completed

Качество на тестовом наборе: 14.123





In [223]:
X_train_ko, X_test_ko, y_train_ko, y_test_ko = train_test_split(data_ko_new_x, data_ko_new_y, test_size=0.2, random_state=42)  

train_pool_ko = Pool(X_train_ko, y_train_ko, cat_features=categorical_features)  

cv_results_ko = cv(train_pool_ko, params, fold_count=3, shuffle=True, stratified=True, verbose=100)  

best_iterations_ko = cv_results_ko['iterations'].values[-1]  
model_ko = CatBoostRegressor(iterations=best_iterations_ko, learning_rate=0.1, depth=6, loss_function='RMSE')  
model_ko.fit(X_train_ko, y_train_ko)  

test_preds_ko = model_ko.predict(X_test_ko)  
test_error_ko = np.mean((test_preds_ko - y_test_ko.values.ravel())**2)  
print(f"Качество на тесте: {test_error_ko.round(3)}")  

Training on fold [0/3]
0:	learn: 44.4308205	test: 46.9435240	best: 46.9435240 (0)	total: 10.9ms	remaining: 10.9s




100:	learn: 0.6791284	test: 1.1996900	best: 1.1996900 (100)	total: 973ms	remaining: 8.66s
200:	learn: 0.3807533	test: 1.0092790	best: 1.0089325 (199)	total: 2s	remaining: 7.95s
300:	learn: 0.2788370	test: 0.9692718	best: 0.9692563 (299)	total: 3.06s	remaining: 7.11s
400:	learn: 0.2203160	test: 0.9553533	best: 0.9553533 (400)	total: 4.09s	remaining: 6.1s
500:	learn: 0.1769189	test: 0.9481178	best: 0.9479766 (488)	total: 5.14s	remaining: 5.12s
600:	learn: 0.1434976	test: 0.9423768	best: 0.9422845 (599)	total: 6.18s	remaining: 4.11s
700:	learn: 0.1185522	test: 0.9400396	best: 0.9400099 (692)	total: 7.21s	remaining: 3.07s
800:	learn: 0.0987275	test: 0.9387354	best: 0.9384977 (779)	total: 8.23s	remaining: 2.04s
900:	learn: 0.0838453	test: 0.9370177	best: 0.9369811 (896)	total: 9.28s	remaining: 1.02s
999:	learn: 0.0705699	test: 0.9366339	best: 0.9365408 (978)	total: 10.3s	remaining: 0us

bestTest = 0.9365407992
bestIteration = 978

Training on fold [1/3]
0:	learn: 46.1415498	test: 44.1284467



100:	learn: 0.5273644	test: 0.6557337	best: 0.6557337 (100)	total: 1.01s	remaining: 9.04s
200:	learn: 0.3486094	test: 0.5284145	best: 0.5284145 (200)	total: 2.1s	remaining: 8.33s
300:	learn: 0.2782697	test: 0.4939203	best: 0.4935514 (299)	total: 3.19s	remaining: 7.41s
400:	learn: 0.2289123	test: 0.4750238	best: 0.4750238 (400)	total: 4.27s	remaining: 6.38s
500:	learn: 0.1911560	test: 0.4610976	best: 0.4610976 (500)	total: 5.39s	remaining: 5.37s
600:	learn: 0.1664313	test: 0.4562348	best: 0.4560782 (596)	total: 6.47s	remaining: 4.29s
700:	learn: 0.1466100	test: 0.4525647	best: 0.4525422 (699)	total: 7.57s	remaining: 3.23s
800:	learn: 0.1309949	test: 0.4509238	best: 0.4509238 (800)	total: 8.66s	remaining: 2.15s
900:	learn: 0.1185338	test: 0.4497562	best: 0.4497562 (900)	total: 9.76s	remaining: 1.07s
999:	learn: 0.1069527	test: 0.4480612	best: 0.4479063 (995)	total: 10.9s	remaining: 0us

bestTest = 0.4479063439
bestIteration = 995

Training on fold [2/3]
0:	learn: 45.7754331	test: 43.2437



100:	learn: 0.5306051	test: 0.5692559	best: 0.5692559 (100)	total: 1.05s	remaining: 9.37s
200:	learn: 0.3408086	test: 0.4122707	best: 0.4122707 (200)	total: 2.13s	remaining: 8.46s
300:	learn: 0.2712587	test: 0.3734448	best: 0.3731919 (297)	total: 3.19s	remaining: 7.42s
400:	learn: 0.2175219	test: 0.3538905	best: 0.3538905 (400)	total: 4.29s	remaining: 6.4s
500:	learn: 0.1861479	test: 0.3411162	best: 0.3410215 (498)	total: 5.37s	remaining: 5.35s
600:	learn: 0.1602103	test: 0.3319397	best: 0.3319081 (597)	total: 6.47s	remaining: 4.3s
700:	learn: 0.1418679	test: 0.3286335	best: 0.3282838 (680)	total: 7.55s	remaining: 3.22s
800:	learn: 0.1264584	test: 0.3253684	best: 0.3253684 (800)	total: 8.64s	remaining: 2.15s
900:	learn: 0.1138168	test: 0.3218952	best: 0.3217590 (899)	total: 9.73s	remaining: 1.07s
999:	learn: 0.1023907	test: 0.3204769	best: 0.3199737 (979)	total: 10.8s	remaining: 0us

bestTest = 0.3199736597
bestIteration = 979

0:	learn: 6.3300236	total: 2.26ms	remaining: 2.25s
1:	lear

**Большая модель**

In [261]:

full_x = pd.concat([data_ko_new_x, data_ggl_new_x, data_nvda_new_X, data_appl_red_X], ignore_index=True)

full_y = pd.concat([data_ko_new_y, data_ggl_new_y, data_nvda_new_y, data_appl_red_y], ignore_index=True)

In [262]:
full_x

Unnamed: 0,Open,Adj Close,Price Change,Percentage Change,SMA_5,SMA_20,RSI,Prev Close,Prev Volume,day_of_week,month,year
0,41.209999,30.491459,0.420002,0.010202,42.115999,42.660000,39.603966,41.169998,19193700.0,0,2,2015
1,41.750000,30.520779,0.040001,0.000962,41.833999,42.632500,40.157484,41.590000,15197500.0,1,2,2015
2,41.770000,30.557434,0.049999,0.001201,41.681999,42.607000,41.304340,41.630001,13193100.0,2,2,2015
3,41.799999,30.638081,0.110001,0.002639,41.634000,42.568000,44.088177,41.680000,16400000.0,3,2,2015
4,41.630001,30.388811,-0.340000,-0.008136,41.572000,42.508000,39.575317,41.790001,11078500.0,4,2,2015
...,...,...,...,...,...,...,...,...,...,...,...,...
8303,182.350006,181.958908,1.069992,0.005885,177.848001,175.118002,63.494342,181.820007,70530000.0,2,11,2023
8304,182.960007,181.481354,-0.479996,-0.002625,179.632001,175.272501,68.737692,182.889999,49340300.0,3,11,2023
8305,183.970001,185.695358,3.989990,0.021874,180.600000,175.357501,72.866869,182.410004,53763500.0,4,11,2023
8306,185.820007,184.101395,-1.599991,-0.008584,182.550000,175.735001,68.647403,186.399994,66133400.0,0,11,2023


In [263]:
X_train_full, X_test_full, y_train_full, y_test_full = train_test_split(full_x, full_y, test_size=0.2, random_state=42) 

train_pool_full = Pool(X_train_full, y_train_full, cat_features=categorical_features)

cv_results_full = cv(train_pool_full, params, fold_count=3, shuffle=True, stratified=True, verbose=100)  

best_iterations_full = cv_results_full['iterations'].values[-1]  
model_full = CatBoostRegressor(iterations=best_iterations_full, learning_rate=0.1, depth=6, loss_function='RMSE')  
model_full.fit(X_train_full, y_train_full)

test_preds_full= model_full.predict(X_test_full)
test_error_full = np.mean((test_preds_full - y_test_full.values.ravel())**2)
print(f"Качество на тесте: {test_error_full.round(3)}")

Training on fold [0/3]
0:	learn: 52.4753000	test: 65.2054442	best: 65.2054442 (0)	total: 23.7ms	remaining: 23.7s




100:	learn: 1.2630056	test: 3.7891757	best: 3.7891757 (100)	total: 1.17s	remaining: 10.4s
200:	learn: 0.7437885	test: 3.4598313	best: 3.4595502 (198)	total: 2.3s	remaining: 9.13s
300:	learn: 0.5195596	test: 3.3820623	best: 3.3820623 (300)	total: 3.44s	remaining: 7.99s
400:	learn: 0.4002893	test: 3.3582455	best: 3.3582455 (400)	total: 4.57s	remaining: 6.82s
500:	learn: 0.3151211	test: 3.3409226	best: 3.3408098 (496)	total: 5.71s	remaining: 5.68s
600:	learn: 0.2556140	test: 3.3330751	best: 3.3327473 (596)	total: 6.88s	remaining: 4.57s
700:	learn: 0.2109945	test: 3.3270198	best: 3.3263141 (667)	total: 8.02s	remaining: 3.42s
800:	learn: 0.1770694	test: 3.3270668	best: 3.3263141 (667)	total: 9.15s	remaining: 2.27s
900:	learn: 0.1498648	test: 3.3240263	best: 3.3236368 (861)	total: 10.3s	remaining: 1.14s
999:	learn: 0.1320164	test: 3.3230126	best: 3.3230061 (998)	total: 11.5s	remaining: 0us

bestTest = 3.323006139
bestIteration = 998

Training on fold [1/3]
0:	learn: 63.7772825	test: 53.52746



100:	learn: 1.0468759	test: 0.7885101	best: 0.7885070 (99)	total: 1.36s	remaining: 12.2s
200:	learn: 0.7638464	test: 0.6329443	best: 0.6327549 (199)	total: 2.65s	remaining: 10.5s
300:	learn: 0.6273807	test: 0.5483670	best: 0.5483426 (298)	total: 3.97s	remaining: 9.22s
400:	learn: 0.5415929	test: 0.4989326	best: 0.4989326 (400)	total: 5.27s	remaining: 7.87s
500:	learn: 0.4798276	test: 0.4635153	best: 0.4635153 (500)	total: 6.56s	remaining: 6.54s
600:	learn: 0.4296225	test: 0.4343246	best: 0.4342062 (599)	total: 7.83s	remaining: 5.2s
700:	learn: 0.3935267	test: 0.4155270	best: 0.4155270 (700)	total: 9.13s	remaining: 3.89s
800:	learn: 0.3634175	test: 0.3999952	best: 0.3999952 (800)	total: 10.4s	remaining: 2.59s
900:	learn: 0.3358492	test: 0.3873919	best: 0.3873919 (900)	total: 11.7s	remaining: 1.29s
999:	learn: 0.3145850	test: 0.3747349	best: 0.3747349 (999)	total: 13s	remaining: 0us

bestTest = 0.3747348652
bestIteration = 999

Training on fold [2/3]
0:	learn: 63.3369434	test: 43.8611088



100:	learn: 1.0195181	test: 0.7091236	best: 0.7083434 (99)	total: 1.34s	remaining: 11.9s
200:	learn: 0.7029618	test: 0.4824101	best: 0.4824101 (200)	total: 2.64s	remaining: 10.5s
300:	learn: 0.5720870	test: 0.3895532	best: 0.3895532 (300)	total: 3.97s	remaining: 9.22s
400:	learn: 0.4950601	test: 0.3282425	best: 0.3282425 (400)	total: 5.23s	remaining: 7.81s
500:	learn: 0.4380285	test: 0.2930646	best: 0.2930646 (500)	total: 6.49s	remaining: 6.47s
600:	learn: 0.3957031	test: 0.2725703	best: 0.2725456 (599)	total: 7.79s	remaining: 5.17s
700:	learn: 0.3599515	test: 0.2490048	best: 0.2490048 (700)	total: 9.08s	remaining: 3.87s
800:	learn: 0.3312723	test: 0.2347165	best: 0.2347165 (800)	total: 10.3s	remaining: 2.57s
900:	learn: 0.3047113	test: 0.2170725	best: 0.2170324 (898)	total: 11.7s	remaining: 1.28s
999:	learn: 0.2837310	test: 0.2071442	best: 0.2071264 (993)	total: 13s	remaining: 0us

bestTest = 0.2071263682
bestIteration = 993

0:	learn: 38.7619562	total: 1.61ms	remaining: 1.61s
1:	lear