In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")


from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error

from pycaret.regression import *
import plotly.express as px

# Чтение данных

In [6]:
def read_data(filename):
    data = pd.read_excel(filename)
    data["data"] = pd.to_datetime(data['data'], dayfirst=True)
    data.set_index("data", inplace = True)
    data.sort_values("data", inplace = True)
    return data

In [7]:
usd = read_data("../data/USD.xlsx")
eu = read_data("../data/EU.xlsx")
jpy = read_data("../data/JPY.xlsx")
cny = read_data("../data/CNY.xlsx")
gbp = read_data("../data/GBP.xlsx")

# Обучение моделей

## Модель для USD

In [11]:
# функция для обучения моделей
def train(monet_df, name):
    monet_data = monet_df["2022-06":]
    data = pd.DataFrame(monet_data["curs"], index = monet_data.index).rename({"curs": "Курс"}, axis = 1)

    # извлечь признаки из даты
    data['Месяц'] = [i.month for i in monet_data.index] 
    data['День недели'] = [i.dayofweek for i in monet_data.index]
    data['День месяца'] = [i.day for i in monet_data.index]
    data.to_csv(f"../data/preprocessed/{name}_df.csv")
    s = setup(data, target = 'Курс', train_size = 0.95,
                  data_split_shuffle = False, fold_strategy = 'timeseries', fold = 3,
                  silent = True,  session_id = 123)
    best_model = compare_models(sort = 'MAE')
    #model = create_model('catboost')
    #tuned_lgb = tune_model(lgb)
    tuned_model = tune_model(best_model)
    f = finalize_model(tuned_model)
    save_model(f, f'models/{name}_model')

In [12]:
# функция для построения прогноза моделью

def predict(end_date, currency):
    """end_date in format  %d, %b %Y' (for example 2022-11-20)
        currency - name of currency (monet) ("USD", "EUR", "JPY", "CNY", "GBP")
    """
    
    data = pd.read_csv(f"data/preprocessed/{currency}_df.csv", parse_dates = ["data"], dayfirst = True, 
                       index_col = ["data"])
    model = load_model(f'models/{currency}_model')
    all_dates = pd.date_range(start='2022-11-19', end = end_date, freq = 'D')
    #dates_all = data.index.append(all_dates)
    
    # create empty dataframe
    score_df = pd.DataFrame()
    # add columns to dataset
    score_df['дата'] = all_dates
    score_df['Месяц'] = [i.month for i in score_df['дата']]
    score_df['День недели'] = [i.dayofweek for i in score_df['дата']]
    score_df['День месяца'] = [i.day for i in score_df['дата']]
    p = predict_model(model, data=score_df)
    score_df_n = pd.concat([score_df["дата"],p], axis = 1) 
    data_reset = data.reset_index()
    data_reset.columns = ['дата', 'Курс', 'Месяц', 'День недели', 'День месяца']
    #data_reset_all = data_reset.append(pd.DataFrame(data = score_df_n.дата[122:]))
    data_reset_all = data_reset.append(pd.DataFrame(data = score_df_n.дата))
    final_df = pd.merge(data_reset_all, score_df_n, how = 'left', left_on=['дата', ], right_on = ['дата'])
    final_df.columns = (['дата', 'Факт', 'Месяц_x', 'День недели_x', 'День месяца_x', 'Месяц_y',
       'День недели_y', 'День месяца_y', 'Прогноз моделью'])
    
    
    fig = px.line(final_df, x="дата", y=["Факт", "Прогноз моделью"], 
        title=f"Прогноз цены {currency}",).update_layout(yaxis={"title": "Курс, руб."}, legend={"title":"Дата"})
    fig.update_xaxes(
        rangeslider_visible=True,
        rangeselector=dict(
            buttons=list([
                dict(count=1, label="1m", step="month", stepmode="backward"),
                dict(count=6, label="6m", step="month", stepmode="backward"),
                dict(count=1, label="YTD", step="year", stepmode="todate"),
                dict(count=1, label="1y", step="year", stepmode="backward"),
                dict(step="all")
            ])
        )
    )
    fig.show()
    
    return final_df

In [13]:
train(usd, "USD")

Unnamed: 0,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,2.2959,7.8608,2.8037,-1.2949,0.0472,0.0385
1,0.8557,1.226,1.1072,-0.3623,0.0183,0.0144
2,1.7568,3.8403,1.9597,-0.2973,0.0318,0.0286
Mean,1.6361,4.309,1.9569,-0.6515,0.0324,0.0272
SD,0.5941,2.7289,0.6926,0.4557,0.0118,0.0099


Transformation Pipeline and Model Successfully Saved


In [14]:
test_final = predict("2022-11-29", "USD")

Transformation Pipeline and Model Successfully Loaded


In [21]:
test_final = predict("2022-12-30", "CNY")

Transformation Pipeline and Model Successfully Loaded


In [16]:
test_final = predict("2023-01-30", "USD")

Transformation Pipeline and Model Successfully Loaded
