In [35]:
import numpy as np
import yfinance as yf  
import pandas as pd

from catboost import CatBoostRegressor, cv, Pool
from sklearn.model_selection import train_test_split  

In [36]:
#опишем функции



def compute_rsi(data, window=14):  
    """Вычисляет индекс относительной силы (RSI) для заданной серии цен."""  
    delta = data.diff()   # Разница между текущим и предшествующим значением  
    gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()  # Средние приросты  
    loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()  # Средние убытки  

    rs = gain / loss  # Соотношение приростов к убыткам  
    rsi = 100 - (100 / (1 + rs))  # Формула RSI  
    return rsi  

def fitch_gen(data):
    """Модифицирует таблицу под наши нужды"""
    data.reset_index(inplace=True)

    data['Price Change'] = data['Close'].diff()  
    data['Percentage Change'] = data['Close'].pct_change()  

    #добавим скользящие средние
    data['SMA_5'] = data['Close'].shift(1).rolling(window=5).mean()  
    data['SMA_20'] = data['Close'].shift(1).rolling(window=20).mean()  

    #и индекс относительной силы
    data['RSI'] = compute_rsi(data['Close'], window=14) 

    

    # Лаговые признаки  
    data['Prev Close'] = data['Close'].shift(1)  
    data['Prev Volume'] = data['Volume'].shift(1)

    #переведем информацию о дате в подходящий для модели формат
    data['day_of_week'] = data['Date'].dt.dayofweek
    data['month'] = data['Date'].dt.month
    data['year'] = data['Date'].dt.year

    #для защиты от подглядывания удалим лишние колонки
    data.drop(['Volume', 'Low', 'High', 'Date'], axis=1, inplace=True)
    data.dropna(inplace=True)
    return data

def remove_outliers(df, column, percentile=0.5):  
    """удаляет выбросы с заданым порогом"""
    lower_bound = df[column].quantile(percentile / 100)  
    upper_bound = df[column].quantile(1 - (percentile / 100))  
    return df[(df[column] >= lower_bound) & (df[column] <= upper_bound)] 

**Apple**

In [37]:
#выгружаем датасет
data_appl = yf.download('AAPL', start='2015-01-01', end='2024-05-31')    

[*********************100%***********************]  1 of 1 completed


In [38]:
#обработаем наши признаки, функцией описанной выше
data_appl_red = fitch_gen(data_appl)

In [39]:
data_appl_red

Unnamed: 0,Open,Close,Adj Close,Price Change,Percentage Change,SMA_5,SMA_20,RSI,Prev Close,Prev Volume,day_of_week,month,year
20,29.512501,29.657499,26.447294,0.367498,0.012547,28.680500,27.660375,66.039668,29.290001,334982000.0,0,2,2015
21,29.625000,29.662500,26.451759,0.005001,0.000169,28.957000,27.776625,64.899258,29.657499,250956400.0,1,2,2015
22,29.625000,29.889999,26.654625,0.227499,0.007670,29.432500,27.931625,66.956211,29.662500,207662800.0,2,2,2015
23,30.004999,29.985001,26.844879,0.095001,0.003178,29.645000,28.097875,75.057301,29.889999,280598800.0,3,2,2015
24,30.004999,29.732500,26.618820,-0.252501,-0.008421,29.697000,28.250250,74.544769,29.985001,168984800.0,4,2,2015
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2363,190.979996,186.880005,186.663940,-4.019989,-0.021058,190.799997,182.031500,58.405376,190.899994,34648500.0,3,5,2024
2364,188.820007,189.979996,189.760345,3.099991,0.016588,190.207999,182.881001,68.584245,186.880005,51005900.0,4,5,2024
2365,191.509995,189.990005,189.770355,0.010010,0.000053,190.229999,183.915000,67.593889,189.979996,36294600.0,1,5,2024
2366,189.610001,190.289993,190.069992,0.299988,0.001579,190.020001,184.739500,67.533647,189.990005,52280100.0,2,5,2024


In [40]:
for col in data_appl_red.columns:
    data_appl_red = remove_outliers(data_appl_red, col)
data_appl_red


Unnamed: 0,Open,Close,Adj Close,Price Change,Percentage Change,SMA_5,SMA_20,RSI,Prev Close,Prev Volume,day_of_week,month,year
20,29.512501,29.657499,26.447294,0.367498,0.012547,28.680500,27.660375,66.039668,29.290001,334982000.0,0,2,2015
21,29.625000,29.662500,26.451759,0.005001,0.000169,28.957000,27.776625,64.899258,29.657499,250956400.0,1,2,2015
22,29.625000,29.889999,26.654625,0.227499,0.007670,29.432500,27.931625,66.956211,29.662500,207662800.0,2,2,2015
23,30.004999,29.985001,26.844879,0.095001,0.003178,29.645000,28.097875,75.057301,29.889999,280598800.0,3,2,2015
24,30.004999,29.732500,26.618820,-0.252501,-0.008421,29.697000,28.250250,74.544769,29.985001,168984800.0,4,2,2015
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2357,187.910004,189.720001,189.500656,2.290009,0.012218,184.814001,174.935500,77.703277,187.429993,52393600.0,2,5,2024
2358,190.470001,189.839996,189.620514,0.119995,0.000632,186.210001,176.021500,79.077007,189.720001,70400000.0,3,5,2024
2359,189.509995,189.869995,189.650482,0.029999,0.000158,187.263998,177.161501,76.276081,189.839996,52845200.0,4,5,2024
2364,188.820007,189.979996,189.760345,3.099991,0.016588,190.207999,182.881001,68.584245,186.880005,51005900.0,4,5,2024


In [41]:
data_appl_red_X = data_appl_red.drop(columns=['Close'])  
data_appl_red_y = data_appl_red['Close'] 

In [42]:
categorical_features = ['day_of_week', 'month', 'year']  

In [43]:
X_train_appl, X_test_appl, y_train_appl, y_test_appl = train_test_split(data_appl_red_X, data_appl_red_y, test_size=0.2, random_state=42) 

In [44]:
#зададим параметры для обучения 
train_pool = Pool(X_train_appl, y_train_appl, cat_features=categorical_features) 
params = {  
    'iterations': 1000,  
    'learning_rate': 0.1,  
    'depth': 6,  
    'loss_function': 'RMSE',  
    'verbose': 100  
}

In [45]:
cv_results = cv(train_pool, params, fold_count=3, shuffle=True, stratified=True, verbose=100)  



Training on fold [0/3]
0:	learn: 105.0557791	test: 95.0897074	best: 95.0897074 (0)	total: 5.13ms	remaining: 5.12s
100:	learn: 2.5438745	test: 10.3477867	best: 10.3477867 (100)	total: 670ms	remaining: 5.96s
200:	learn: 0.8395244	test: 10.1368672	best: 10.1366144 (198)	total: 1.38s	remaining: 5.47s
300:	learn: 0.3372378	test: 10.1176417	best: 10.1176417 (300)	total: 2.08s	remaining: 4.82s
400:	learn: 0.1844543	test: 10.1186913	best: 10.1165027 (303)	total: 2.89s	remaining: 4.32s
500:	learn: 0.0991260	test: 10.1153442	best: 10.1153442 (500)	total: 3.67s	remaining: 3.66s
600:	learn: 0.0602461	test: 10.1146746	best: 10.1146553 (598)	total: 4.44s	remaining: 2.95s
700:	learn: 0.0362372	test: 10.1156050	best: 10.1145928 (605)	total: 5.21s	remaining: 2.22s
800:	learn: 0.0197420	test: 10.1155308	best: 10.1145928 (605)	total: 6.05s	remaining: 1.5s
900:	learn: 0.0112828	test: 10.1156378	best: 10.1145928 (605)	total: 6.78s	remaining: 745ms
999:	learn: 0.0067628	test: 10.1158046	best: 10.1145928 (60



100:	learn: 1.4472953	test: 1.4454232	best: 1.4454232 (100)	total: 876ms	remaining: 7.8s
200:	learn: 0.9813170	test: 1.0893518	best: 1.0839287 (191)	total: 1.66s	remaining: 6.61s
300:	learn: 0.7812616	test: 0.9947268	best: 0.9917199 (293)	total: 2.47s	remaining: 5.75s
400:	learn: 0.6563888	test: 0.9394881	best: 0.9394881 (400)	total: 3.26s	remaining: 4.87s
500:	learn: 0.5556085	test: 0.9012309	best: 0.9009242 (498)	total: 4.06s	remaining: 4.04s
600:	learn: 0.4804073	test: 0.8811415	best: 0.8803930 (599)	total: 4.95s	remaining: 3.28s
700:	learn: 0.4241668	test: 0.8639168	best: 0.8639168 (700)	total: 5.82s	remaining: 2.48s
800:	learn: 0.3770569	test: 0.8452337	best: 0.8448975 (798)	total: 6.87s	remaining: 1.71s
900:	learn: 0.3392595	test: 0.8348371	best: 0.8345517 (899)	total: 7.84s	remaining: 862ms
999:	learn: 0.3091165	test: 0.8248663	best: 0.8237155 (986)	total: 8.77s	remaining: 0us

bestTest = 0.823715497
bestIteration = 986

Training on fold [2/3]
0:	learn: 93.0155670	test: 116.5782



100:	learn: 1.3542757	test: 0.8377550	best: 0.8377550 (100)	total: 848ms	remaining: 7.55s
200:	learn: 0.9530856	test: 0.6886249	best: 0.6795409 (188)	total: 1.69s	remaining: 6.73s
300:	learn: 0.7432729	test: 0.6000204	best: 0.5805732 (217)	total: 2.49s	remaining: 5.79s
400:	learn: 0.6220328	test: 0.4790516	best: 0.4790516 (400)	total: 3.3s	remaining: 4.93s
500:	learn: 0.5272832	test: 0.4169125	best: 0.4169125 (500)	total: 4.11s	remaining: 4.09s
600:	learn: 0.4545091	test: 0.4008167	best: 0.3863245 (595)	total: 4.94s	remaining: 3.28s
700:	learn: 0.4005452	test: 0.2912243	best: 0.2912243 (700)	total: 5.75s	remaining: 2.45s
800:	learn: 0.3510205	test: 0.2283079	best: 0.2283079 (800)	total: 6.57s	remaining: 1.63s
900:	learn: 0.3093011	test: 0.1869110	best: 0.1869110 (900)	total: 7.4s	remaining: 814ms
999:	learn: 0.2766540	test: 0.1718358	best: 0.1718358 (999)	total: 8.19s	remaining: 0us

bestTest = 0.1718357792
bestIteration = 999



In [46]:
best_iterations = cv_results['iterations'].values[-1]  
model_apple = CatBoostRegressor(iterations=best_iterations, learning_rate=0.1, depth=6, loss_function='RMSE')  
model_apple.fit(X_train_appl, y_train_appl)

0:	learn: 50.1697489	total: 1.26ms	remaining: 1.25s
1:	learn: 45.5194213	total: 2.3ms	remaining: 1.15s
2:	learn: 41.2958834	total: 3.21ms	remaining: 1.06s
3:	learn: 37.4606946	total: 4.35ms	remaining: 1.08s
4:	learn: 33.9301733	total: 5.43ms	remaining: 1.08s
5:	learn: 30.8104064	total: 6.39ms	remaining: 1.06s
6:	learn: 27.9456081	total: 7.34ms	remaining: 1.04s
7:	learn: 25.4435796	total: 8.28ms	remaining: 1.02s
8:	learn: 23.1180658	total: 9.34ms	remaining: 1.03s
9:	learn: 20.9526845	total: 10.4ms	remaining: 1.03s
10:	learn: 19.0145214	total: 11.3ms	remaining: 1.02s
11:	learn: 17.2640169	total: 12.3ms	remaining: 1.01s
12:	learn: 15.7120534	total: 13.2ms	remaining: 1s
13:	learn: 14.2797311	total: 14.6ms	remaining: 1.03s
14:	learn: 12.9691756	total: 16.2ms	remaining: 1.06s
15:	learn: 11.8318962	total: 17.7ms	remaining: 1.09s
16:	learn: 10.7863144	total: 18.9ms	remaining: 1.09s
17:	learn: 9.8735265	total: 20.1ms	remaining: 1.09s
18:	learn: 8.9902118	total: 21.7ms	remaining: 1.12s
19:	learn

<catboost.core.CatBoostRegressor at 0x192387624b0>

In [47]:

test_preds = model_apple.predict(X_test_appl)

test_error = np.mean((test_preds - y_test_appl.values.ravel())**2)

print(f"Качество на тесте: {test_error.round(3)}")


Качество на тесте: 0.619


**Nvidia**

In [48]:
data_nvda = yf.download('NVDA', start='2015-01-01', end='2024-05-31')  

[*********************100%***********************]  1 of 1 completed


In [49]:
data_nvda_new = fitch_gen(data_nvda)

In [50]:
data_nvda_new

Unnamed: 0,Open,Close,Adj Close,Price Change,Percentage Change,SMA_5,SMA_20,RSI,Prev Close,Prev Volume,day_of_week,month,year
20,0.483000,0.490500,0.470935,0.010500,0.021875,0.492700,0.496150,49.172581,0.480000,298200000.0,0,2,2015
21,0.494000,0.502750,0.482697,0.012250,0.024974,0.487700,0.495513,54.797435,0.490500,259604000.0,1,2,2015
22,0.501250,0.504000,0.483897,0.001250,0.002486,0.490100,0.495913,54.506441,0.502750,183468000.0,2,2,2015
23,0.504500,0.512250,0.491818,0.008250,0.016369,0.494350,0.497125,59.175249,0.504000,181516000.0,3,2,2015
24,0.511750,0.510000,0.489658,-0.002250,-0.004392,0.497900,0.498813,54.803479,0.512250,140448000.0,4,2,2015
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2363,102.028000,103.799004,103.781601,8.849007,0.093196,94.390799,90.112151,77.827210,94.949997,548648000.0,3,5,2024
2364,104.448997,106.469002,106.451149,2.669998,0.025723,96.278799,91.170501,77.252838,103.799004,835065000.0,4,5,2024
2365,110.244003,113.901001,113.881905,7.431999,0.069804,99.076801,92.107201,86.326425,106.469002,429494000.0,1,5,2024
2366,113.050003,114.824997,114.805748,0.923996,0.008112,102.901001,93.414400,87.082646,113.901001,652728000.0,2,5,2024


In [51]:
for col in data_nvda_new.columns:
    data_nvda_new = remove_outliers(data_nvda_new, col)

In [52]:
data_nvda_new

Unnamed: 0,Open,Close,Adj Close,Price Change,Percentage Change,SMA_5,SMA_20,RSI,Prev Close,Prev Volume,day_of_week,month,year
38,0.553500,0.551500,0.531541,-0.003500,-0.006306,0.555750,0.530425,73.850639,0.555000,152864000.0,4,2,2015
39,0.550000,0.565000,0.544553,0.013500,0.024479,0.554350,0.533275,77.750028,0.551500,157492000.0,0,3,2015
40,0.560750,0.554500,0.534432,-0.010500,-0.018584,0.556550,0.537525,65.974034,0.565000,301512000.0,1,3,2015
41,0.552500,0.560750,0.540456,0.006250,0.011271,0.555900,0.540725,70.454555,0.554500,160344000.0,2,3,2015
42,0.560750,0.571500,0.550817,0.010750,0.019171,0.557350,0.543625,59.655181,0.560750,226840000.0,3,3,2015
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2273,55.018002,56.382000,56.369904,1.672001,0.030561,53.854999,50.134950,82.323237,54.709999,352994000.0,1,1,2024
2274,56.347000,56.053001,56.040974,-0.328999,-0.005835,54.680799,50.536550,79.294258,56.382000,449580000.0,2,1,2024
2275,57.259998,57.106998,57.094746,1.053997,0.018804,55.263400,50.894700,80.814245,56.053001,474394000.0,3,1,2024
2276,57.988998,59.491001,59.478237,2.384003,0.041746,55.814799,51.246200,83.777205,57.106998,491650000.0,4,1,2024


In [53]:
data_nvda_new_X = data_nvda_new.drop(columns=['Close'])  
data_nvda_new_y = data_nvda_new['Close']

In [54]:
X_train_nvda, X_test_nvda, y_train_nvda, y_test_nvda = train_test_split(data_nvda_new_X, data_nvda_new_y, test_size=0.2, random_state=42) 

In [55]:
train_pool_nvda = Pool(X_train_nvda, y_train_nvda, cat_features=categorical_features)

In [56]:
cv_results_nvda = cv(train_pool_nvda, params, fold_count=3, shuffle=True, stratified=True, verbose=100)  

best_iterations_nvda = cv_results_nvda['iterations'].values[-1]  
model_nvda = CatBoostRegressor(iterations=best_iterations_nvda, learning_rate=0.1, depth=6, loss_function='RMSE')  
model_nvda.fit(X_train_nvda, y_train_nvda)

test_preds_nvda= model_nvda.predict(X_test_nvda)
test_error_nvda = np.mean((test_preds_nvda - y_test_nvda.values.ravel())**2)
print(f"Качество на тесте: {test_error_nvda.round(3)}")



Training on fold [0/3]
0:	learn: 7.2905313	test: 16.7337879	best: 16.7337879 (0)	total: 7.46ms	remaining: 7.45s
100:	learn: 0.4005733	test: 8.4619849	best: 8.4619849 (100)	total: 662ms	remaining: 5.89s
200:	learn: 0.0693465	test: 8.1748130	best: 8.1748130 (200)	total: 1.42s	remaining: 5.64s
300:	learn: 0.0181391	test: 8.1596100	best: 8.1595707 (298)	total: 2.13s	remaining: 4.96s
400:	learn: 0.0061974	test: 8.1583287	best: 8.1583114 (398)	total: 2.89s	remaining: 4.31s
500:	learn: 0.0024758	test: 8.1582060	best: 8.1582000 (496)	total: 3.65s	remaining: 3.64s
600:	learn: 0.0010481	test: 8.1582421	best: 8.1582000 (496)	total: 4.39s	remaining: 2.91s
700:	learn: 0.0003948	test: 8.1582714	best: 8.1582000 (496)	total: 5.18s	remaining: 2.21s
800:	learn: 0.0001688	test: 8.1582860	best: 8.1582000 (496)	total: 5.97s	remaining: 1.48s
900:	learn: 0.0000891	test: 8.1582923	best: 8.1582000 (496)	total: 6.68s	remaining: 734ms
999:	learn: 0.0000379	test: 8.1582940	best: 8.1582000 (496)	total: 7.39s	remai



100:	learn: 0.4281032	test: 0.2480611	best: 0.2480611 (100)	total: 795ms	remaining: 7.08s
200:	learn: 0.2591531	test: 0.2005760	best: 0.2005760 (200)	total: 1.58s	remaining: 6.28s
300:	learn: 0.2010908	test: 0.1881973	best: 0.1869532 (298)	total: 2.56s	remaining: 5.94s
400:	learn: 0.1610209	test: 0.1913684	best: 0.1869532 (298)	total: 3.45s	remaining: 5.16s
500:	learn: 0.1340201	test: 0.1837400	best: 0.1833468 (482)	total: 4.37s	remaining: 4.36s
600:	learn: 0.1140340	test: 0.1818711	best: 0.1816646 (599)	total: 5.23s	remaining: 3.47s
700:	learn: 0.0997916	test: 0.1781388	best: 0.1780546 (697)	total: 6.03s	remaining: 2.57s
800:	learn: 0.0887509	test: 0.1763421	best: 0.1756102 (760)	total: 7.02s	remaining: 1.74s
900:	learn: 0.0793137	test: 0.1752319	best: 0.1749916 (857)	total: 8.05s	remaining: 884ms
999:	learn: 0.0713695	test: 0.1748738	best: 0.1742704 (988)	total: 9.03s	remaining: 0us

bestTest = 0.1742703653
bestIteration = 988

Training on fold [2/3]
0:	learn: 15.4098525	test: 0.2195



100:	learn: 0.3296503	test: 0.0220069	best: 0.0117111 (87)	total: 1.04s	remaining: 9.27s
200:	learn: 0.2242947	test: 0.0388935	best: 0.0099516 (116)	total: 1.97s	remaining: 7.83s
300:	learn: 0.1692197	test: 0.0321151	best: 0.0099516 (116)	total: 2.85s	remaining: 6.61s
400:	learn: 0.1325583	test: 0.0428715	best: 0.0099516 (116)	total: 3.71s	remaining: 5.55s
500:	learn: 0.1111299	test: 0.0437430	best: 0.0099516 (116)	total: 4.59s	remaining: 4.57s
600:	learn: 0.0936749	test: 0.0441630	best: 0.0099516 (116)	total: 5.51s	remaining: 3.66s
700:	learn: 0.0808816	test: 0.0419867	best: 0.0099516 (116)	total: 6.36s	remaining: 2.71s
800:	learn: 0.0707897	test: 0.0387709	best: 0.0099516 (116)	total: 7.25s	remaining: 1.8s
900:	learn: 0.0634075	test: 0.0382217	best: 0.0099516 (116)	total: 8.2s	remaining: 901ms
999:	learn: 0.0571880	test: 0.0389643	best: 0.0099516 (116)	total: 9.16s	remaining: 0us

bestTest = 0.009951611502
bestIteration = 116

0:	learn: 11.0691298	total: 1.14ms	remaining: 1.13s
1:	le

**Google**

In [57]:
data_ggl = yf.download('GOOGL', start='2015-01-01', end='2024-05-31')  

[*********************100%***********************]  1 of 1 completed


In [58]:
data_ggl_new = fitch_gen(data_ggl)

In [59]:
data_ggl_new

Unnamed: 0,Open,Close,Adj Close,Price Change,Percentage Change,SMA_5,SMA_20,RSI,Prev Close,Prev Volume,day_of_week,month,year
20,26.716000,26.610001,26.544273,-0.267500,-0.009953,26.211200,25.795975,66.007656,26.877501,121108000.0,0,2,2015
21,26.497000,26.665001,26.599138,0.055000,0.002067,26.166000,25.802600,64.841697,26.610001,75378000.0,1,2,2015
22,26.657000,26.305000,26.240026,-0.360001,-0.013501,26.287101,25.837200,59.236201,26.665001,47062000.0,2,2,2015
23,26.396500,26.491501,26.426065,0.186501,0.007090,26.423801,25.885850,61.630628,26.305000,33896000.0,3,2,2015
24,26.550501,26.694000,26.628065,0.202499,0.007644,26.589801,25.947550,60.782680,26.491501,33176000.0,4,2,2015
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2363,177.070007,173.550003,173.121323,-2.830002,-0.016045,176.278000,169.766000,64.833079,176.380005,17880000.0,3,5,2024
2364,174.979996,174.990005,174.557770,1.440002,0.008297,176.152002,170.643500,65.766574,173.550003,21024900.0,4,5,2024
2365,174.449997,176.399994,175.964279,1.409988,0.008058,175.938004,170.795500,62.804550,174.990005,16572500.0,1,5,2024
2366,175.429993,175.899994,175.465515,-0.500000,-0.002834,175.834003,171.308000,67.395896,176.399994,20572200.0,2,5,2024


In [60]:
for col in data_ggl_new.columns:
    data_ggl_new = remove_outliers(data_ggl_new, col)

In [61]:
data_ggl_new_x = data_ggl_new.drop(columns=['Close'])  
data_ggl_new_y = data_ggl_new['Close']

In [62]:
X_train_ggl, X_test_ggl, y_train_ggl, y_test_ggl = train_test_split(data_ggl_new_x, data_ggl_new_y, test_size=0.2, random_state=42) 

train_pool_ggl = Pool(X_train_ggl, y_train_ggl, cat_features=categorical_features)

cv_results_ggl = cv(train_pool_ggl, params, fold_count=3, shuffle=True, stratified=True, verbose=100)  

best_iterations_ggl = cv_results_ggl['iterations'].values[-1]  
model_ggl = CatBoostRegressor(iterations=best_iterations_ggl, learning_rate=0.1, depth=6, loss_function='RMSE')  
model_ggl.fit(X_train_ggl, y_train_ggl)

test_preds_ggl= model_ggl.predict(X_test_ggl)
test_error_ggl = np.mean((test_preds_ggl - y_test_ggl.values.ravel())**2)
print(f"Качество на тесте: {test_error_ggl.round(3)}")

Training on fold [0/3]
0:	learn: 84.0524710	test: 81.6379195	best: 81.6379195 (0)	total: 7.45ms	remaining: 7.44s




100:	learn: 1.6763029	test: 16.0597825	best: 16.0597825 (100)	total: 657ms	remaining: 5.85s
200:	learn: 0.2078777	test: 15.5836958	best: 15.5836958 (200)	total: 1.45s	remaining: 5.78s
300:	learn: 0.0397561	test: 15.5671977	best: 15.5671977 (300)	total: 2.2s	remaining: 5.12s
400:	learn: 0.0087861	test: 15.5650181	best: 15.5650181 (400)	total: 2.92s	remaining: 4.36s
500:	learn: 0.0020327	test: 15.5647120	best: 15.5647120 (500)	total: 3.69s	remaining: 3.68s
600:	learn: 0.0004631	test: 15.5646751	best: 15.5646738 (596)	total: 4.41s	remaining: 2.93s
700:	learn: 0.0001348	test: 15.5646595	best: 15.5646595 (696)	total: 5.09s	remaining: 2.17s
800:	learn: 0.0000362	test: 15.5646591	best: 15.5646587 (744)	total: 5.86s	remaining: 1.46s
900:	learn: 0.0000090	test: 15.5646594	best: 15.5646587 (744)	total: 6.64s	remaining: 730ms
999:	learn: 0.0000020	test: 15.5646594	best: 15.5646587 (744)	total: 7.4s	remaining: 0us

bestTest = 15.56465871
bestIteration = 744

Training on fold [1/3]
0:	learn: 76.462



100:	learn: 1.0517286	test: 1.0054263	best: 0.8489594 (57)	total: 863ms	remaining: 7.68s
200:	learn: 0.7586588	test: 0.9296970	best: 0.8489594 (57)	total: 1.74s	remaining: 6.93s
300:	learn: 0.5928321	test: 0.8995597	best: 0.8489594 (57)	total: 2.75s	remaining: 6.39s
400:	learn: 0.4950311	test: 0.8620382	best: 0.8489594 (57)	total: 3.66s	remaining: 5.47s
500:	learn: 0.4291290	test: 0.8419839	best: 0.8410662 (487)	total: 4.57s	remaining: 4.55s
600:	learn: 0.3744835	test: 0.8220290	best: 0.8220181 (599)	total: 5.46s	remaining: 3.63s
700:	learn: 0.3319465	test: 0.8038622	best: 0.8037840 (698)	total: 6.44s	remaining: 2.75s
800:	learn: 0.2930514	test: 0.8021479	best: 0.7950825 (733)	total: 7.39s	remaining: 1.83s
900:	learn: 0.2632347	test: 0.8001135	best: 0.7950825 (733)	total: 8.36s	remaining: 918ms
999:	learn: 0.2388631	test: 0.8011476	best: 0.7950825 (733)	total: 9.23s	remaining: 0us

bestTest = 0.795082513
bestIteration = 733

Training on fold [2/3]
0:	learn: 76.3684743	test: 93.8074075	



100:	learn: 1.0250387	test: 2.1010345	best: 0.0017267 (31)	total: 906ms	remaining: 8.06s
200:	learn: 0.7274764	test: 0.9592959	best: 0.0017267 (31)	total: 1.78s	remaining: 7.09s
300:	learn: 0.5881467	test: 0.6193119	best: 0.0017267 (31)	total: 2.63s	remaining: 6.12s
400:	learn: 0.4836523	test: 0.4071009	best: 0.0017267 (31)	total: 3.49s	remaining: 5.22s
500:	learn: 0.4180948	test: 0.1556530	best: 0.0017267 (31)	total: 4.38s	remaining: 4.37s
600:	learn: 0.3599043	test: 0.1002922	best: 0.0017267 (31)	total: 5.31s	remaining: 3.52s
700:	learn: 0.3211368	test: 0.0151669	best: 0.0017267 (31)	total: 6.17s	remaining: 2.63s
800:	learn: 0.2858655	test: 0.0422119	best: 0.0017267 (31)	total: 7.08s	remaining: 1.76s
900:	learn: 0.2559760	test: 0.1208401	best: 0.0017267 (31)	total: 7.97s	remaining: 876ms
999:	learn: 0.2335930	test: 0.1522007	best: 0.0017267 (31)	total: 8.84s	remaining: 0us

bestTest = 0.00172667498
bestIteration = 31

0:	learn: 32.5255686	total: 1.49ms	remaining: 1.49s
1:	learn: 29.5

**Coca-Cola Company**

In [63]:
data_ko = yf.download('KO', start='2015-01-01', end='2024-05-31')    

[*********************100%***********************]  1 of 1 completed


In [64]:
data_ko_new = fitch_gen(data_ko)

In [65]:
for col in data_ko_new.columns:
    data_ko_new = remove_outliers(data_ko_new, col)

In [66]:
data_ko_new_x = data_ko_new.drop(columns=['Close'])  
data_ko_new_y = data_ko_new['Close']

In [67]:
X_train_ko, X_test_ko, y_train_ko, y_test_ko = train_test_split(data_ko_new_x, data_ko_new_y, test_size=0.2, random_state=42)  

train_pool_ko = Pool(X_train_ko, y_train_ko, cat_features=categorical_features)  

cv_results_ko = cv(train_pool_ko, params, fold_count=3, shuffle=True, stratified=True, verbose=100)  

best_iterations_ko = cv_results_ko['iterations'].values[-1]  
model_ko = CatBoostRegressor(iterations=best_iterations_ko, learning_rate=0.1, depth=6, loss_function='RMSE')  
model_ko.fit(X_train_ko, y_train_ko)  

test_preds_ko = model_ko.predict(X_test_ko)  
test_error_ko = np.mean((test_preds_ko - y_test_ko.values.ravel())**2)  
print(f"Качество на тесте: {test_error_ko.round(3)}")  



Training on fold [0/3]
0:	learn: 45.2660531	test: 47.0726762	best: 47.0726762 (0)	total: 17.2ms	remaining: 17.2s
100:	learn: 0.6492888	test: 1.0287894	best: 1.0287894 (100)	total: 848ms	remaining: 7.54s
200:	learn: 0.3805195	test: 0.8970710	best: 0.8970710 (200)	total: 1.67s	remaining: 6.65s
300:	learn: 0.2703586	test: 0.8563625	best: 0.8563625 (300)	total: 2.52s	remaining: 5.84s
400:	learn: 0.2131096	test: 0.8453293	best: 0.8453293 (400)	total: 3.4s	remaining: 5.08s
500:	learn: 0.1699726	test: 0.8364759	best: 0.8363838 (497)	total: 4.28s	remaining: 4.26s
600:	learn: 0.1365585	test: 0.8316271	best: 0.8314842 (597)	total: 5.11s	remaining: 3.39s
700:	learn: 0.1119585	test: 0.8291640	best: 0.8291640 (700)	total: 5.93s	remaining: 2.53s
800:	learn: 0.0944774	test: 0.8278921	best: 0.8278921 (800)	total: 6.77s	remaining: 1.68s
900:	learn: 0.0800412	test: 0.8274469	best: 0.8273326 (895)	total: 7.56s	remaining: 831ms
999:	learn: 0.0684420	test: 0.8272001	best: 0.8269423 (963)	total: 8.36s	remai



100:	learn: 0.5588983	test: 0.5340524	best: 0.5340524 (100)	total: 910ms	remaining: 8.1s
200:	learn: 0.3605002	test: 0.4185269	best: 0.4185269 (200)	total: 1.79s	remaining: 7.1s
300:	learn: 0.2808290	test: 0.3882128	best: 0.3882128 (300)	total: 2.69s	remaining: 6.26s
400:	learn: 0.2376517	test: 0.3747030	best: 0.3747030 (400)	total: 3.54s	remaining: 5.29s
500:	learn: 0.2068349	test: 0.3686444	best: 0.3682594 (495)	total: 4.45s	remaining: 4.43s
600:	learn: 0.1805371	test: 0.3598123	best: 0.3598123 (600)	total: 5.37s	remaining: 3.56s
700:	learn: 0.1608360	test: 0.3549641	best: 0.3549641 (700)	total: 6.26s	remaining: 2.67s
800:	learn: 0.1432761	test: 0.3516645	best: 0.3516645 (800)	total: 7.14s	remaining: 1.77s
900:	learn: 0.1305988	test: 0.3505564	best: 0.3505564 (900)	total: 7.95s	remaining: 873ms
999:	learn: 0.1190492	test: 0.3490074	best: 0.3489972 (997)	total: 8.75s	remaining: 0us

bestTest = 0.3489971579
bestIteration = 997

Training on fold [2/3]
0:	learn: 46.2760926	test: 43.56952



100:	learn: 0.4920558	test: 0.4875121	best: 0.4875121 (100)	total: 757ms	remaining: 6.73s
200:	learn: 0.3345484	test: 0.4045728	best: 0.4045728 (200)	total: 1.57s	remaining: 6.24s
300:	learn: 0.2572056	test: 0.3695535	best: 0.3695535 (300)	total: 2.4s	remaining: 5.57s
400:	learn: 0.2081439	test: 0.3461879	best: 0.3461879 (400)	total: 3.22s	remaining: 4.81s
500:	learn: 0.1749482	test: 0.3315041	best: 0.3313811 (499)	total: 4.04s	remaining: 4.03s
600:	learn: 0.1493716	test: 0.3225836	best: 0.3225836 (600)	total: 4.85s	remaining: 3.22s
700:	learn: 0.1320425	test: 0.3146710	best: 0.3146710 (700)	total: 5.69s	remaining: 2.42s
800:	learn: 0.1171476	test: 0.3098286	best: 0.3096809 (798)	total: 6.5s	remaining: 1.61s
900:	learn: 0.1059528	test: 0.3064344	best: 0.3064110 (897)	total: 7.44s	remaining: 818ms
999:	learn: 0.0963822	test: 0.3031663	best: 0.3031339 (998)	total: 8.27s	remaining: 0us

bestTest = 0.3031338916
bestIteration = 998

0:	learn: 6.4448589	total: 1.83ms	remaining: 1.82s
1:	lear

**Большая модель**

In [68]:

full_x = pd.concat([data_ko_new_x, data_ggl_new_x, data_nvda_new_X, data_appl_red_X], ignore_index=True)

full_y = pd.concat([data_ko_new_y, data_ggl_new_y, data_nvda_new_y, data_appl_red_y], ignore_index=True)

In [69]:
full_x

Unnamed: 0,Open,Adj Close,Price Change,Percentage Change,SMA_5,SMA_20,RSI,Prev Close,Prev Volume,day_of_week,month,year
0,41.209999,30.491451,0.420002,0.010202,42.115999,42.660000,39.603966,41.169998,19193700.0,0,2,2015
1,41.750000,30.520767,0.040001,0.000962,41.833999,42.632500,40.157484,41.590000,15197500.0,1,2,2015
2,41.770000,30.557432,0.049999,0.001201,41.681999,42.607000,41.304340,41.630001,13193100.0,2,2,2015
3,41.799999,30.638083,0.110001,0.002639,41.634000,42.568000,44.088177,41.680000,16400000.0,3,2,2015
4,41.630001,30.388805,-0.340000,-0.008136,41.572000,42.508000,39.575317,41.790001,11078500.0,4,2,2015
...,...,...,...,...,...,...,...,...,...,...,...,...
8452,187.910004,189.500656,2.290009,0.012218,184.814001,174.935500,77.703277,187.429993,52393600.0,2,5,2024
8453,190.470001,189.620514,0.119995,0.000632,186.210001,176.021500,79.077007,189.720001,70400000.0,3,5,2024
8454,189.509995,189.650482,0.029999,0.000158,187.263998,177.161501,76.276081,189.839996,52845200.0,4,5,2024
8455,188.820007,189.760345,3.099991,0.016588,190.207999,182.881001,68.584245,186.880005,51005900.0,4,5,2024


In [70]:
X_train_full, X_test_full, y_train_full, y_test_full = train_test_split(full_x, full_y, test_size=0.2, random_state=42) 

train_pool_full = Pool(X_train_full, y_train_full, cat_features=categorical_features)

cv_results_full = cv(train_pool_full, params, fold_count=3, shuffle=True, stratified=True, verbose=100)  

best_iterations_full = cv_results_full['iterations'].values[-1]  
model_full = CatBoostRegressor(iterations=best_iterations_full, learning_rate=0.1, depth=6, loss_function='RMSE')  
model_full.fit(X_train_full, y_train_full)

test_preds_full= model_full.predict(X_test_full)
test_error_full = np.mean((test_preds_full - y_test_full.values.ravel())**2)
print(f"Качество на тесте: {test_error_full.round(3)}")

Training on fold [0/3]
0:	learn: 56.5369993	test: 67.3399431	best: 67.3399431 (0)	total: 11.2ms	remaining: 11.2s




100:	learn: 1.4713140	test: 4.3722304	best: 4.3722304 (100)	total: 827ms	remaining: 7.36s
200:	learn: 0.8678063	test: 4.1104865	best: 4.1104865 (200)	total: 1.69s	remaining: 6.72s
300:	learn: 0.6202814	test: 4.0793803	best: 4.0793803 (300)	total: 2.54s	remaining: 5.9s
400:	learn: 0.4695361	test: 4.0756890	best: 4.0717722 (357)	total: 3.41s	remaining: 5.1s
500:	learn: 0.3733513	test: 4.0680634	best: 4.0675041 (494)	total: 4.22s	remaining: 4.2s
600:	learn: 0.2976087	test: 4.0575981	best: 4.0575791 (599)	total: 5.03s	remaining: 3.34s
700:	learn: 0.2465223	test: 4.0573148	best: 4.0569620 (696)	total: 5.85s	remaining: 2.5s
800:	learn: 0.2093498	test: 4.0563635	best: 4.0553213 (737)	total: 6.66s	remaining: 1.65s
900:	learn: 0.1799528	test: 4.0572524	best: 4.0553213 (737)	total: 7.46s	remaining: 820ms
999:	learn: 0.1543778	test: 4.0555224	best: 4.0552514 (988)	total: 8.28s	remaining: 0us

bestTest = 4.055251361
bestIteration = 988

Training on fold [1/3]
0:	learn: 66.0915948	test: 58.4596700	



100:	learn: 1.0708939	test: 0.8976569	best: 0.8976569 (100)	total: 1.04s	remaining: 9.23s
200:	learn: 0.7547849	test: 0.6801725	best: 0.6801725 (200)	total: 2.06s	remaining: 8.18s
300:	learn: 0.6135009	test: 0.5752575	best: 0.5751467 (299)	total: 3.13s	remaining: 7.27s
400:	learn: 0.5158875	test: 0.5150367	best: 0.5150367 (400)	total: 4.09s	remaining: 6.1s
500:	learn: 0.4516975	test: 0.4744559	best: 0.4744559 (500)	total: 5.05s	remaining: 5.03s
600:	learn: 0.4001011	test: 0.4460933	best: 0.4460933 (600)	total: 5.97s	remaining: 3.96s
700:	learn: 0.3609018	test: 0.4227961	best: 0.4227961 (700)	total: 6.93s	remaining: 2.96s
800:	learn: 0.3296017	test: 0.4059026	best: 0.4058735 (798)	total: 7.87s	remaining: 1.96s
900:	learn: 0.3048207	test: 0.3918094	best: 0.3918094 (900)	total: 8.82s	remaining: 969ms
999:	learn: 0.2828264	test: 0.3813160	best: 0.3812595 (997)	total: 9.73s	remaining: 0us

bestTest = 0.3812595371
bestIteration = 997

Training on fold [2/3]
0:	learn: 65.8111613	test: 45.2719



100:	learn: 1.0747393	test: 0.7013363	best: 0.7013363 (100)	total: 953ms	remaining: 8.48s
200:	learn: 0.7653853	test: 0.5674084	best: 0.5674058 (199)	total: 1.9s	remaining: 7.54s
300:	learn: 0.6169153	test: 0.4996537	best: 0.4995760 (296)	total: 2.91s	remaining: 6.76s
400:	learn: 0.5268504	test: 0.4314353	best: 0.4314353 (400)	total: 3.96s	remaining: 5.91s
500:	learn: 0.4674046	test: 0.3785002	best: 0.3785002 (500)	total: 4.96s	remaining: 4.94s
600:	learn: 0.4155831	test: 0.3401446	best: 0.3401446 (600)	total: 5.9s	remaining: 3.92s
700:	learn: 0.3774490	test: 0.3095621	best: 0.3095432 (698)	total: 6.94s	remaining: 2.96s
800:	learn: 0.3461485	test: 0.2861861	best: 0.2861853 (799)	total: 7.95s	remaining: 1.98s
900:	learn: 0.3214075	test: 0.2730119	best: 0.2727097 (893)	total: 8.97s	remaining: 985ms
999:	learn: 0.2999374	test: 0.2590190	best: 0.2590190 (999)	total: 9.95s	remaining: 0us

bestTest = 0.2590190121
bestIteration = 999

0:	learn: 40.7236289	total: 1.92ms	remaining: 1.91s
1:	lea