In [1]:
import os
import json

dir = '../fetch/data/rating_changes'

In [14]:
contest_data = []

for f in os.listdir(dir):
    if not f.endswith('.json'):
        continue
    # try to load by json, skip if failed
    try:
        with open(os.path.join(dir, f)) as file:
            data = json.load(file)
        contest_data.append({
            'rank': [x['rank'] for x in data],
            'old_rating': [x['oldRating'] for x in data],
            'new_rating': [x['newRating'] for x in data],
            'handle': [x['handle'] for x in data],
            'contest_Id': data[0]['contestId'],
            'contestName': data[0]['contestName'],
        })
    except:
        continue

print('Total {} Contest'.format(len(contest_data)))
    

Total 206 Contest


In [17]:
import pandas as pd

rows = []
for contest in contest_data:
    for i in range(len(contest['rank'])):
        row = {
            'rank': contest['rank'][i],
            'old_rating': contest['old_rating'][i],
            'new_rating': contest['new_rating'][i],
            'handle': contest['handle'][i],
            'contest_id': contest['contest_Id'],
            'contest_name': contest['contestName'],
        }
        rows.append(row)

df = pd.DataFrame(rows)

df

Unnamed: 0,rank,old_rating,new_rating,handle,contest_id,contest_name
0,1,3572,3668,tourist,1500,"Codeforces Round 707 (Div. 1, based on Moscow ..."
1,2,3229,3355,jiangly,1500,"Codeforces Round 707 (Div. 1, based on Moscow ..."
2,3,3555,3564,maroonrk,1500,"Codeforces Round 707 (Div. 1, based on Moscow ..."
3,4,3363,3403,ecnerwala,1500,"Codeforces Round 707 (Div. 1, based on Moscow ..."
4,5,3238,3294,Rewinding,1500,"Codeforces Round 707 (Div. 1, based on Moscow ..."
...,...,...,...,...,...,...
2400109,10455,704,751,jayasri_dasari,1951,Codeforces Global Round 25
2400110,10455,907,810,Vuh,1951,Codeforces Global Round 25
2400111,10455,0,358,anshgogoi201,1951,Codeforces Global Round 25
2400112,10455,385,604,LouisVie61,1951,Codeforces Global Round 25


In [8]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# 提取数据并计算 delta_rating
ranks = []
old_ratings = []
old_rating_percentiles = []
delta_ratings = []

for data in contest_data:
    local_ranks = [x / len(data['rank']) for x in data['rank']]
    local_old_ratings = data['old_rating']
    local_new_ratings = data['new_rating']
    
    # 计算 old_rating 的百分位数
    sorted_indices = np.argsort(local_old_ratings)
    sorted_old_ratings = np.array(local_old_ratings)[sorted_indices]
    percentiles = np.argsort(sorted_indices) / len(sorted_indices)

    # 扩展主列表
    ranks.extend(local_ranks)
    old_ratings.extend(local_old_ratings)
    old_rating_percentiles.extend(percentiles)
    delta_ratings.extend(np.array(local_new_ratings) - np.array(local_old_ratings))

# 转换成 numpy 数组
ranks = np.array(ranks)
old_ratings = np.array(old_ratings)
old_rating_percentiles = np.array(old_rating_percentiles)
delta_ratings = np.array(delta_ratings)

# 转换成 numpy 数组
ranks = np.array(ranks)
old_ratings = np.array(old_ratings)
delta_ratings = np.array(delta_ratings)

# # 归一化或标准化数据，这里我们使用简单的标准化
# ranks = (ranks - np.mean(ranks)) / np.std(ranks)
# old_ratings = (old_ratings - np.mean(old_ratings)) / np.std(old_ratings)

# 整合特征
X = np.vstack((old_rating_percentiles, ranks, old_ratings)).T
y = delta_ratings

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 构建模型
model = Sequential([
    Dense(64, input_dim=3, activation='relu'),
    Dense(64, activation='relu'),
    Dense(1)
])

# 编译模型
model.compile(optimizer='adam', loss='mean_squared_error')

# 训练模型
history = model.fit(X_train, y_train, epochs=20, validation_split=0.2, verbose=1)

# 预测测试集
predictions = model.predict(X_test).flatten()

# 绘制预测结果与实际结果
plt.figure(figsize=(10, 5))
plt.scatter(y_test, predictions, alpha=0.5)
plt.xlabel('Actual Delta Ratings')
plt.ylabel('Predicted Delta Ratings')
plt.title('Actual vs. Predicted Delta Ratings')
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red')  # Perfect predictions



Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20

KeyboardInterrupt: 