In [2]:
import pandas as pd
import joblib, requests, datetime

# Обучение модели прогнозирования временных рядов рейтингов шахматистов
Данные:
- Рейтинг (целое положительное)
- Дата (год-месяц-день)
- Название рейтинга (строка)

Горизонт прогнозирования: 30 дней

# Получение данных

In [3]:
class LichessApi:
    def __init__(self):
        self.base_url = 'https://lichess.org/api/'

    def get_player_ratings(self, username):
        response = requests.get(f'{self.base_url}user/{username}/rating-history')
        response.raise_for_status()

        # {
        #     "Bullet": [
        #         ("2011-09-11", 1472)
        #     ]
        # }
        return {perf['name']: [(datetime.date(point[0], point[1] + 1, point[2]), point[3]) for point in perf['points']]
                for perf in response.json()}

    def get_all_top10_user_ids(self):
        response = requests.get(f'{self.base_url}player')
        response.raise_for_status()

        player_id_lists = [[player['id'] for player in players] for game_type, players in response.json().items()]
        return [player_id for player_id_list in player_id_lists for player_id in player_id_list]

In [4]:
api = LichessApi()
top10_user_ids = api.get_all_top10_user_ids()

In [5]:
top10_user_ids

['nihalsarin2004',
 'penguingim1',
 'ediz_gurel',
 'konstantinkornienko',
 'aaryan_varshney',
 'aqua_blazing',
 'night-king96',
 'arka50',
 'sindarovgm',
 'grind_neverstops',
 'athena-pallada',
 'yarebore',
 'bettercollsoul',
 'loin_sn',
 'anythingbutaverage',
 'lintchevski_daniil',
 'ciderdrinker',
 'vincentkeymer2004',
 'winx_m',
 'xiaochess',
 'pap-g',
 'dakaissa_25',
 'shadhur-3187',
 'tuzakli_egitim',
 'rehbwf',
 'pultis12',
 'ilqar_7474',
 'mr-a7aaaa17a7a',
 'theprodigco',
 'raskolnikovrebel',
 'igormezentsev',
 'plemsovhoz',
 'powerfulll',
 'vlad_lazarev79',
 'ailands',
 'zwenna',
 'josedavid321',
 'tempest_dragon',
 'chesstheory64',
 'ojaijoao',
 'penguingim1',
 'shadowking78',
 'blazinq',
 'konstantinkornienko',
 'ediz_gurel',
 'fritzi_2003',
 'nihalsarin2004',
 'ragehunter',
 'patetico',
 'lama_the_best',
 'larso',
 'grxbullet',
 'jannlee',
 'sonogoneli',
 'hanzotherazor',
 'visualdennis',
 'johnstuckey',
 'legiondestroyer',
 'oldhas-been',
 'adridem',
 'zhigalko_sergei',
 'a

In [6]:
ratings = {user_id: api.get_player_ratings(user_id) for user_id in top10_user_ids}

In [7]:
ratings[top10_user_ids[0]]

{'Bullet': [(datetime.date(2017, 9, 17), 2146),
  (datetime.date(2018, 4, 24), 2469),
  (datetime.date(2018, 4, 25), 2504),
  (datetime.date(2018, 4, 28), 2470),
  (datetime.date(2018, 7, 18), 2600),
  (datetime.date(2018, 7, 20), 2617),
  (datetime.date(2018, 7, 21), 2608),
  (datetime.date(2018, 7, 24), 2609),
  (datetime.date(2018, 7, 31), 2611),
  (datetime.date(2018, 8, 5), 2660),
  (datetime.date(2018, 8, 6), 2570),
  (datetime.date(2018, 8, 9), 2550),
  (datetime.date(2018, 8, 10), 2568),
  (datetime.date(2018, 8, 15), 2596),
  (datetime.date(2018, 8, 16), 2638),
  (datetime.date(2018, 8, 17), 2650),
  (datetime.date(2018, 8, 18), 2700),
  (datetime.date(2018, 8, 20), 2677),
  (datetime.date(2018, 8, 21), 2656),
  (datetime.date(2018, 8, 22), 2691),
  (datetime.date(2018, 8, 23), 2652),
  (datetime.date(2018, 8, 24), 2666),
  (datetime.date(2018, 8, 25), 2611),
  (datetime.date(2018, 8, 26), 2680),
  (datetime.date(2018, 8, 27), 2647),
  (datetime.date(2018, 8, 28), 2651),
  (da

# Подготовка данных

In [8]:
data = []
for user_id, user_ratings in ratings.items():
    for perf_name, perf_ratings in user_ratings.items():
        for date, rating in perf_ratings:
            data.append({
                'user_id': user_id,
                'name': perf_name,
                'date': date,
                'rating': rating
            })
df = pd.DataFrame(data)

In [9]:
df

Unnamed: 0,user_id,name,date,rating
0,nihalsarin2004,Bullet,2017-09-17,2146
1,nihalsarin2004,Bullet,2018-04-24,2469
2,nihalsarin2004,Bullet,2018-04-25,2504
3,nihalsarin2004,Bullet,2018-04-28,2470
4,nihalsarin2004,Bullet,2018-07-18,2600
...,...,...,...,...
165554,atoantrac,UltraBullet,2024-11-12,1683
165555,atoantrac,UltraBullet,2024-11-17,1677
165556,atoantrac,UltraBullet,2024-11-19,1678
165557,atoantrac,UltraBullet,2024-11-26,1667


# Обучение моделей
Модели:
- ARIMA
- Prophet
- LSTM

## Подготовка обучающей, валидирующей и тестовых выборок
Стоит учесть, что данные являются временными рядами, поэтому разбиение на выборки должно быть сделано с учетом времени.

In [13]:
train = df[df['date'] < df['date'].quantile(0.6)]
val = df[(df['date'] >= df['date'].quantile(0.6)) & (df['date'] < df['date'].quantile(0.8))]
test = df[df['date'] >= df['date'].quantile(0.8)]

In [14]:
train

Unnamed: 0,user_id,name,date,rating
0,nihalsarin2004,Bullet,2017-09-17,2146
1,nihalsarin2004,Bullet,2018-04-24,2469
2,nihalsarin2004,Bullet,2018-04-25,2504
3,nihalsarin2004,Bullet,2018-04-28,2470
4,nihalsarin2004,Bullet,2018-07-18,2600
...,...,...,...,...
164958,imakemanymistakes,Racing Kings,2023-02-08,2447
164959,imakemanymistakes,Racing Kings,2023-02-09,2451
164960,imakemanymistakes,Racing Kings,2023-02-11,2311
165042,imakemanymistakes,Crazyhouse,2023-01-29,1420


## ARIMA

In [15]:
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX

In [16]:
arima = SARIMAX(train['rating'], order=(5, 1, 0))
arima = arima.fit()

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


In [17]:
arima.summary()

0,1,2,3
Dep. Variable:,rating,No. Observations:,99300.0
Model:,"SARIMAX(5, 1, 0)",Log Likelihood,-550828.098
Date:,"Wed, 25 Dec 2024",AIC,1101668.195
Time:,20:35:57,BIC,1101725.23
Sample:,0,HQIC,1101685.509
,- 99300,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
ar.L1,-0.1321,0.001,-126.425,0.000,-0.134,-0.130
ar.L2,-0.0891,0.001,-65.895,0.000,-0.092,-0.086
ar.L3,-0.0639,0.002,-37.807,0.000,-0.067,-0.061
ar.L4,-0.0520,0.002,-28.775,0.000,-0.056,-0.048
ar.L5,-0.0280,0.002,-13.671,0.000,-0.032,-0.024
sigma2,3851.2858,1.999,1926.826,0.000,3847.368,3855.203

0,1,2,3
Ljung-Box (L1) (Q):,0.06,Jarque-Bera (JB):,96702186.41
Prob(Q):,0.81,Prob(JB):,0.0
Heteroskedasticity (H):,0.63,Skew:,-7.55
Prob(H) (two-sided):,0.0,Kurtosis:,155.13


In [18]:
forecast = arima.forecast(steps=len(val))

  return get_prediction_index(
  return get_prediction_index(


In [19]:
forecast

99300     1246.090855
99301     1320.241938
99302     1368.626920
99303     1388.146880
99304     1378.345969
             ...     
132380    1365.573467
132381    1365.573467
132382    1365.573467
132383    1365.573467
132384    1365.573467
Name: predicted_mean, Length: 33085, dtype: float64

In [20]:
val['rating']

571       3201
572       3221
573       3204
574       3250
575       3212
          ... 
164977    2249
164978    2236
164979    2258
164980    2235
164981    2232
Name: rating, Length: 33085, dtype: int64

In [21]:
from sklearn.metrics import mean_squared_error

In [22]:
mean_squared_error(val['rating'], forecast)

1273890.0286271202

## Prophet

In [23]:
from prophet import Prophet

  from .autonotebook import tqdm as notebook_tqdm
Importing plotly failed. Interactive plots will not work.


In [25]:
proph = Prophet()
proph.fit(train[['date', 'rating']].rename(columns={'date': 'ds', 'rating': 'y'}))

20:38:11 - cmdstanpy - INFO - Chain [1] start processing
20:38:48 - cmdstanpy - INFO - Chain [1] done processing


<prophet.forecaster.Prophet at 0x202430d49e0>

In [27]:
forecast = proph.predict(val[['date']].rename(columns={'date': 'ds'}))

In [28]:
forecast

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,weekly,weekly_lower,weekly_upper,yearly,yearly_lower,yearly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat
0,2023-02-12,2413.310532,1913.420286,2850.052020,2413.310532,2413.310532,-14.786233,-14.786233,-14.786233,-4.554127,-4.554127,-4.554127,-10.232106,-10.232106,-10.232106,0.0,0.0,0.0,2398.524299
1,2023-02-12,2413.310532,1933.115048,2851.024553,2413.310532,2413.310532,-14.786233,-14.786233,-14.786233,-4.554127,-4.554127,-4.554127,-10.232106,-10.232106,-10.232106,0.0,0.0,0.0,2398.524299
2,2023-02-12,2413.310532,1924.601308,2875.124983,2413.310532,2413.310532,-14.786233,-14.786233,-14.786233,-4.554127,-4.554127,-4.554127,-10.232106,-10.232106,-10.232106,0.0,0.0,0.0,2398.524299
3,2023-02-12,2413.310532,1935.092979,2872.621273,2413.310532,2413.310532,-14.786233,-14.786233,-14.786233,-4.554127,-4.554127,-4.554127,-10.232106,-10.232106,-10.232106,0.0,0.0,0.0,2398.524299
4,2023-02-12,2413.310532,1968.165484,2824.572741,2413.310532,2413.310532,-14.786233,-14.786233,-14.786233,-4.554127,-4.554127,-4.554127,-10.232106,-10.232106,-10.232106,0.0,0.0,0.0,2398.524299
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33080,2024-02-04,2445.351931,1976.399958,2924.728266,2307.421466,2595.951742,-11.353487,-11.353487,-11.353487,-4.554127,-4.554127,-4.554127,-6.799361,-6.799361,-6.799361,0.0,0.0,0.0,2433.998444
33081,2024-02-04,2445.351931,1989.149366,2943.227390,2307.414395,2595.958763,-11.353487,-11.353487,-11.353487,-4.554127,-4.554127,-4.554127,-6.799361,-6.799361,-6.799361,0.0,0.0,0.0,2433.998444
33082,2024-02-04,2445.351931,1976.628201,2898.171952,2307.407324,2595.965785,-11.353487,-11.353487,-11.353487,-4.554127,-4.554127,-4.554127,-6.799361,-6.799361,-6.799361,0.0,0.0,0.0,2433.998444
33083,2024-02-04,2445.351931,1982.583058,2933.067571,2307.400253,2595.972806,-11.353487,-11.353487,-11.353487,-4.554127,-4.554127,-4.554127,-6.799361,-6.799361,-6.799361,0.0,0.0,0.0,2433.998444


In [29]:
mean_squared_error(val['rating'], forecast['yhat'])

108755.58050809577

## LSTM

In [30]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

In [39]:
# Reshape the training data
train_ratings = train['rating'].values.reshape(-1, 1, 1)

# Define and compile the LSTM model
lstm = Sequential()
lstm.add(LSTM(50, activation='relu', input_shape=(1, 1)))
lstm.add(Dense(1))
lstm.compile(optimizer='adam', loss='mse')

  super().__init__(**kwargs)


In [40]:
# Fit the LSTM model
lstm.fit(train_ratings, train['rating'].values, epochs=5, batch_size=32)

Epoch 1/5
[1m3104/3104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 2ms/step - loss: 805094.0000
Epoch 2/5
[1m3104/3104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - loss: 0.0253
Epoch 3/5
[1m3104/3104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 1ms/step - loss: 0.0275
Epoch 4/5
[1m3104/3104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - loss: 0.0263
Epoch 5/5
[1m3104/3104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - loss: 0.0468


<keras.src.callbacks.history.History at 0x2025e39ea20>

In [41]:
forecast = lstm.predict(val['rating'])

[1m1034/1034[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 793us/step


In [42]:
forecast

array([[3200.5286],
       [3220.5227],
       [3203.5276],
       ...,
       [2257.7957],
       [2234.802 ],
       [2231.803 ]], dtype=float32)

In [43]:
mean_squared_error(val['rating'], forecast)

0.0748983845114708

## Выбор модели на тестовой выборке

In [45]:
arima_forecast = arima.forecast(steps=len(test))
proph_forecast = proph.predict(test[['date']].rename(columns={'date': 'ds'}))
lstm_forecast = lstm.predict(test['rating'])

  return get_prediction_index(
  return get_prediction_index(


[1m1037/1037[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 769us/step


In [46]:
arima_mse = mean_squared_error(test['rating'], arima_forecast)
proph_mse = mean_squared_error(test['rating'], proph_forecast['yhat'])
lstm_mse = mean_squared_error(test['rating'], lstm_forecast)

In [47]:
arima_mse, proph_mse, lstm_mse

(1449270.5872240362, 105025.62396109631, 0.08673804998397827)

# Сохранение модели

In [48]:
joblib.dump(lstm, 'rating_predictor_model.pkl')

['rating_predictor_model.pkl']