In [145]:
import requests
import yfinance as yf
import pandas as pd
from datetime import timedelta
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression as lr
from sklearn.metrics import mean_absolute_error as MAE
from sklearn.ensemble import GradientBoostingRegressor as GBR

In [106]:
df = yf.download('EURRUB=X')
df = df.Close.copy()
rates = pd.DataFrame(df).reset_index()
rates.set_index('Date', inplace = True)
rates

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2003-12-01,35.509998
2003-12-02,35.880001
2003-12-03,35.980000
2003-12-04,35.840000
2003-12-05,36.080002
...,...
2022-09-30,55.341202
2022-10-03,56.449402
2022-10-04,56.627602
2022-10-05,57.857399


In [107]:
def make_features(data, max_lag, rolling_mean_size):
    data['year'] = data.index.year
    data['month'] = data.index.month
    data['day'] = data.index.day
    data['dayofweek'] = data.index.dayofweek
    data['is_weekend'] = data.index.isin([5,6])*1
    
    for lag in range(1, max_lag + 1):
        data['lag_{}'.format(lag)] = data['Close'].shift(lag)

    data['y_mean'] = data['Close'].shift().rolling(rolling_mean_size).mean().copy()

In [108]:
make_features(rates,30,3)

In [109]:
rates.dropna(inplace = True)
rates

Unnamed: 0_level_0,Close,year,month,day,dayofweek,is_weekend,lag_1,lag_2,lag_3,lag_4,...,lag_22,lag_23,lag_24,lag_25,lag_26,lag_27,lag_28,lag_29,lag_30,y_mean
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2004-01-12,36.799999,2004,1,12,0,0,37.186901,37.150002,36.950001,37.189999,...,35.919998,35.959999,36.169998,36.090000,36.080002,35.840000,35.980000,35.880001,35.509998,37.095634
2004-01-13,36.900002,2004,1,13,1,0,36.799999,37.186901,37.150002,36.950001,...,36.070000,35.919998,35.959999,36.169998,36.090000,36.080002,35.840000,35.980000,35.880001,37.045634
2004-01-14,36.520000,2004,1,14,2,0,36.900002,36.799999,37.186901,37.150002,...,36.160000,36.070000,35.919998,35.959999,36.169998,36.090000,36.080002,35.840000,35.980000,36.962301
2004-01-15,36.369999,2004,1,15,3,0,36.520000,36.900002,36.799999,37.186901,...,36.029999,36.160000,36.070000,35.919998,35.959999,36.169998,36.090000,36.080002,35.840000,36.740000
2004-01-16,35.676498,2004,1,16,4,0,36.369999,36.520000,36.900002,36.799999,...,36.270000,36.029999,36.160000,36.070000,35.919998,35.959999,36.169998,36.090000,36.080002,36.596667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-09-30,55.341202,2022,9,30,4,0,54.529202,55.318001,55.153000,56.037498,...,59.687599,59.453098,59.096001,58.630501,59.022099,57.650799,59.060001,59.720001,60.292301,55.000067
2022-10-03,56.449402,2022,10,3,0,0,55.341202,54.529202,55.318001,55.153000,...,58.868000,59.687599,59.453098,59.096001,58.630501,59.022099,57.650799,59.060001,59.720001,55.062801
2022-10-04,56.627602,2022,10,4,1,0,56.449402,55.341202,54.529202,55.318001,...,60.115898,58.868000,59.687599,59.453098,59.096001,58.630501,59.022099,57.650799,59.060001,55.439935
2022-10-05,57.857399,2022,10,5,2,0,56.627602,56.449402,55.341202,54.529202,...,59.981701,60.115898,58.868000,59.687599,59.453098,59.096001,58.630501,59.022099,57.650799,56.139402


In [110]:
X_train,X_test, y_train, y_test = train_test_split(rates.drop('Close',axis=1),
                                                  rates.Close,
                                                  shuffle = False,
                                                  test_size = 0.2)

In [111]:
clf = lr()
clf.fit(X_train,y_train)

LinearRegression()

In [112]:
prediction = clf.predict(X_test)

In [113]:
print('Ошибка на обучающей выборке:',MAE(y_train,clf.predict(X_train)), '\nОшибка на тестовой выборке:',MAE(y_test,prediction))

Ошибка на обучающей выборке: 0.41558923941822834 
Ошибка на тестовой выборке: 1.0537491941524004


### Предсказание на T+1

In [116]:
df_new = pd.DataFrame(rates['Close']).reset_index().copy()

In [124]:
df_new

Unnamed: 0,Date,Close
0,2004-01-12,36.799999
1,2004-01-13,36.900002
2,2004-01-14,36.520000
3,2004-01-15,36.369999
4,2004-01-16,35.676498
...,...,...
4388,2022-09-30,55.341202
4389,2022-10-03,56.449402
4390,2022-10-04,56.627602
4391,2022-10-05,57.857399


In [127]:
next_day = df_new.tail(1).copy()
next_day['Date'] = next_day['Date'] + timedelta(days=1)
next_day['Close'] = 0
next_day.set_index('Date', inplace = True)
next_day.reset_index(inplace = True)

In [128]:
next_day

Unnamed: 0,Date,Close
0,2022-10-08,0


In [130]:
df2 = pd.concat([df_new,next_day], ignore_index=True)
df2.set_index('Date', inplace = True)
df2

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2004-01-12,36.799999
2004-01-13,36.900002
2004-01-14,36.520000
2004-01-15,36.369999
2004-01-16,35.676498
...,...
2022-10-03,56.449402
2022-10-04,56.627602
2022-10-05,57.857399
2022-10-07,58.497799


In [132]:
make_features(df2,30,3)
df2

Unnamed: 0_level_0,Close,year,month,day,dayofweek,is_weekend,lag_1,lag_2,lag_3,lag_4,...,lag_22,lag_23,lag_24,lag_25,lag_26,lag_27,lag_28,lag_29,lag_30,y_mean
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2004-01-12,36.799999,2004,1,12,0,0,,,,,...,,,,,,,,,,
2004-01-13,36.900002,2004,1,13,1,0,36.799999,,,,...,,,,,,,,,,
2004-01-14,36.520000,2004,1,14,2,0,36.900002,36.799999,,,...,,,,,,,,,,
2004-01-15,36.369999,2004,1,15,3,0,36.520000,36.900002,36.799999,,...,,,,,,,,,,36.740000
2004-01-16,35.676498,2004,1,16,4,0,36.369999,36.520000,36.900002,36.799999,...,,,,,,,,,,36.596667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-10-03,56.449402,2022,10,3,0,0,55.341202,54.529202,55.318001,55.153000,...,58.868000,59.687599,59.453098,59.096001,58.630501,59.022099,57.650799,59.060001,59.720001,55.062801
2022-10-04,56.627602,2022,10,4,1,0,56.449402,55.341202,54.529202,55.318001,...,60.115898,58.868000,59.687599,59.453098,59.096001,58.630501,59.022099,57.650799,59.060001,55.439935
2022-10-05,57.857399,2022,10,5,2,0,56.627602,56.449402,55.341202,54.529202,...,59.981701,60.115898,58.868000,59.687599,59.453098,59.096001,58.630501,59.022099,57.650799,56.139402
2022-10-07,58.497799,2022,10,7,4,0,57.857399,56.627602,56.449402,55.341202,...,59.597401,59.981701,60.115898,58.868000,59.687599,59.453098,59.096001,58.630501,59.022099,56.978134


In [135]:
lr_full = lr()
lr_full.fit(rates.drop('Close',axis=1),rates.Close)

LinearRegression()

In [146]:
gb = GBR()
gb.fit(rates.drop('Close',axis=1),rates.Close)

GradientBoostingRegressor()

In [147]:
print('Предсказание на следующий день - ',clf.predict(df2.drop('Close',axis=1).tail(1))[0].round(4))
print('Предсказание на следующий день по полному датасету - ',lr_full.predict(df2.drop('Close',axis=1).tail(1))[0].round(4))
print('Предсказание на следующий день с помощью GradientBoostingRegressor - ',gb.predict(df2.drop('Close',axis=1).tail(1))[0].round(4))

Предсказание на следующий день -  58.1629
Предсказание на следующий день по полному датасету -  58.5447
Предсказание на следующий день с помощью GradientBoostingRegressor -  59.2347
