In [None]:
import pandas as pd
import joblib, requests, datetime

# Обучение модели прогнозирования временных рядов рейтингов шахматистов
Данные:
- Рейтинг (целое положительное)
- Дата (год-месяц-день)
- Название рейтинга (строка)

Горизонт прогнозирования: 30 дней

# Получение данных

In [None]:
class LichessApi:
    def __init__(self):
        self.base_url = 'https://lichess.org/api/'

    def get_player_ratings(self, username):
        response = requests.get(f'{self.base_url}user/{username}/rating-history')
        response.raise_for_status()

        # {
        #     "Bullet": [
        #         ("2011-09-11", 1472)
        #     ]
        # }
        return {perf['name']: [(datetime.date(point[0], point[1] + 1, point[2]), point[3]) for point in perf['points']]
                for perf in response.json()}

    def get_all_top10_user_ids(self):
        response = requests.get(f'{self.base_url}player')
        response.raise_for_status()

        player_id_lists = [[player['id'] for player in players] for game_type, players in response.json().items()]
        return [player_id for player_id_list in player_id_lists for player_id in player_id_list]

In [None]:
api = LichessApi()
top10_user_ids = api.get_all_top10_user_ids()

In [None]:
top10_user_ids

In [None]:
ratings = {user_id: api.get_player_ratings(user_id) for user_id in top10_user_ids}

In [None]:
ratings[top10_user_ids[0]]

# Подготовка данных

In [None]:
data = []
for user_id, user_ratings in ratings.items():
    for perf_name, perf_ratings in user_ratings.items():
        for date, rating in perf_ratings:
            data.append({
                'user_id': user_id,
                'name': perf_name,
                'date': date,
                'rating': rating
            })
df = pd.DataFrame(data)

In [None]:
df

# Обучение моделей
Модели:
- ARIMA
- Prophet
- LSTM

## Подготовка обучающей, валидирующей и тестовых выборок
Стоит учесть, что данные являются временными рядами, поэтому разбиение на выборки должно быть сделано с учетом времени.

In [None]:
train = df[df['date'] < df['date'].quantile(0.6)]
val = df[(df['date'] >= df['date'].quantile(0.6)) & (df['date'] < df['date'].quantile(0.8))]
test = df[df['date'] >= df['date'].quantile(0.8)]

In [None]:
train

## ARIMA

In [None]:
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX

In [None]:
arima = SARIMAX(train['rating'], order=(5, 1, 0))
arima = arima.fit()

In [None]:
arima.summary()

In [None]:
forecast = arima.forecast(steps=len(val))

In [None]:
forecast

In [None]:
val['rating']

In [None]:
from sklearn.metrics import mean_squared_error

In [None]:
mean_squared_error(val['rating'], forecast)

## Prophet

In [None]:
from prophet import Prophet

In [None]:
proph = Prophet()
proph.fit(train[['date', 'rating']].rename(columns={'date': 'ds', 'rating': 'y'}))

In [None]:
forecast = proph.predict(val[['date']].rename(columns={'date': 'ds'}))

In [None]:
forecast

In [None]:
mean_squared_error(val['rating'], forecast['yhat'])

## LSTM

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

In [None]:
# Reshape the training data
train_ratings = train['rating'].values.reshape(-1, 1, 1)

# Define and compile the LSTM model
lstm = Sequential()
lstm.add(LSTM(50, activation='relu', input_shape=(1, 1)))
lstm.add(Dense(1))
lstm.compile(optimizer='adam', loss='mse')

In [None]:
# Fit the LSTM model
lstm.fit(train_ratings, train['rating'].values, epochs=5, batch_size=32)

In [None]:
forecast = lstm.predict(val['rating'])

In [None]:
forecast

In [None]:
mean_squared_error(val['rating'], forecast)

## Выбор модели на тестовой выборке

In [None]:
arima_forecast = arima.forecast(steps=len(test))
proph_forecast = proph.predict(test[['date']].rename(columns={'date': 'ds'}))
lstm_forecast = lstm.predict(test['rating'])

In [None]:
arima_mse = mean_squared_error(test['rating'], arima_forecast)
proph_mse = mean_squared_error(test['rating'], proph_forecast['yhat'])
lstm_mse = mean_squared_error(test['rating'], lstm_forecast)

In [None]:
arima_mse, proph_mse, lstm_mse

# Сохранение модели

In [None]:
joblib.dump(lstm, 'rating_predictor_model.pkl')