# Use Uhunt API

In [None]:
import requests
import pandas as pd

class UhuntAPI:
    def __init__(self):
        self.base_url = 'https://uhunt.onlinejudge.org/api'
        self.session = requests.Session()
        self.session.headers.update({'User-Agent': 'Mozilla/5.0'})
    
    def get_problems(self):
        path = '/p'
        url = f'{self.base_url}{path}'
        response = self.session.get(url)
        if response.status_code == 200:
            data = response.json()
            columns = ['pid', 'num', 'title', 'dacu', 'mrun', 'mmem', 'nover', 'sube', 'noj', 'inq', 'ce', 'rf', 're', 'ole', 'tle', 'mle', 'wa', 'pe', 'ac', 'rtl', 'status', 'rej']
            return pd.DataFrame(data, columns=columns)
        else:
            raise Exception(f"Failed to fetch problems: {response.status_code}")

    def get_problem_submissions(self, pid, start_sbt=0, end_sbt=2147483647):
        results = []
        while True:
            path = f'/p/subs/{pid}/{start_sbt}/{end_sbt}'
            url = f'{self.base_url}{path}'
            response = self.session.get(url)
            if response.status_code == 200:
                data = response.json()
                columns = ['sid', 'uid', 'pid', 'ver', 'lan', 'run', 'mem', 'rank', 'sbt', 'name', 'uname']
                df = pd.DataFrame(data, columns=columns)
                results.append(df)
                end_sbt = df['sbt'].min()
                if not data or end_sbt <= start_sbt:
                    break
            else:
                raise Exception(f"Failed to fetch submissions for problem {pid}: {response.status_code}")
        df = pd.concat(results, ignore_index=True)
        df = df[df['sbt'] >= start_sbt]
        df.sort_values(by='sbt', ascending=False, inplace=True)
        df.reset_index(drop=True, inplace=True)
        return df
        


# Get Submission Data

In [20]:
import os

try:
    api = UhuntAPI()
    problems_df = api.get_problems()
    submissions_dir = 'submissions'
    if not os.path.exists(submissions_dir):
        os.makedirs(submissions_dir)
    for index, row in problems_df.iterrows():
        path = os.path.join(submissions_dir, f'{row["num"]}.csv')
        if os.path.exists(path):
            prev_submissions_df = pd.read_csv(path)
            last_submission_time = prev_submissions_df['sbt'].max()
            submissions_df = api.get_problem_submissions(row['pid'], last_submission_time + 1)
            submissions_df = pd.concat([prev_submissions_df, submissions_df], ignore_index=True)
            submissions_df.drop_duplicates(subset=['sid'], keep='last', inplace=True)
        else:
            submissions_df = api.get_problem_submissions(row['pid'])
        submissions_df.to_csv(path, index=False)
except Exception as e:
    print(e)

# Concatenate all submissions into a single CSV file

In [21]:
import os
import pandas as pd

try:
    submissions_dir = 'submissions'
    all_submissions = []
    for filename in os.listdir(submissions_dir):
        if filename.endswith('.csv'):
            path = os.path.join(submissions_dir, filename)
            df = pd.read_csv(path)
            all_submissions.append(df)
    all_submissions_df = pd.concat(all_submissions, ignore_index=True)
    all_submissions_df.drop_duplicates(subset=['sid'], keep='last', inplace=True)
    all_submissions_df.sort_values(by=['sid'], ascending=True, inplace=True)
    all_submissions_df.to_csv('all_submissions.csv', index=False)
except Exception as e:
    print(e)

# Get previous attempt count for each submission

In [24]:
import pandas as pd

try:
    # filename = 'test/all_submissions_1000.csv'
    filename = 'all_submissions.csv'
    df = pd.read_csv(filename)

    first_ac_sid = df[df['ver'] == 90].groupby(['uid', 'pid'])['sid'].min().reset_index()
    first_ac_sid.rename(columns={'sid': 'first_ac_sid'}, inplace=True)
    df = df.merge(first_ac_sid, on=['uid', 'pid'], how='left')

    attempt_df = df[
        df['ver'].between(30, 90)
        & ((df['first_ac_sid'].isna()) | (df['sid'] <= df['first_ac_sid']))
    ]
    prev_attempt_count = attempt_df.groupby(['uid', 'pid']).cumcount()
    attempt_ac = attempt_df['ver'] == 90

    attempt_df = attempt_df[['sid', 'uid', 'pid']]
    attempt_df['prev_attempt_count'] = prev_attempt_count
    attempt_df['ac'] = attempt_ac

    final_attempt_idx = attempt_df.groupby(['uid', 'pid'])['prev_attempt_count'].idxmax()
    attempt_df = attempt_df.loc[final_attempt_idx]
    attempt_df.sort_values(by=['sid'], ascending=True, inplace=True)
    attempt_df.to_csv('attempt_count.csv', index=False)

except Exception as e:
    print(e)

# Simulate Elo rating calculation

In [None]:
from math import pow
from collections import defaultdict
import pandas as pd

class EloRating:
    def __init__(self, initial_problem_rating=500, initial_user_rating=500, k=50, decay=0.9):
        self.problem_ratings = defaultdict(lambda: initial_problem_rating)
        self.user_ratings = defaultdict(lambda: initial_user_rating)
        self.k = k
        self.decay = decay
    
    def update_rating(self, problem_id, user_id, ac, prev_attempt_count):
        problem_rating = self.problem_ratings[problem_id]
        user_rating = self.user_ratings[user_id]
        score = ac * pow(self.decay, prev_attempt_count)
        rating_change_problem, rating_change_user = self.elo_rating_change(problem_rating, user_rating, 1-score, score, self.k)

        self.problem_ratings[problem_id] += rating_change_problem
        self.user_ratings[user_id] += rating_change_user

    def elo_rating_change(self, rating_a, rating_b, score_a, score_b, k):
        expected_score_a = 1 / (1 + 10 ** ((rating_b - rating_a) / 400))
        expected_score_b = 1 / (1 + 10 ** ((rating_a - rating_b) / 400))
        rating_change_a = k * (score_a - expected_score_a)
        rating_change_b = k * (score_b - expected_score_b)
        return rating_change_a, rating_change_b

    def save_problem_ratings(self, filename):
        api = UhuntAPI()
        problems_df = api.get_problems()
        problems_df['rating'] = problems_df['pid'].map(self.problem_ratings)
        problems_df = problems_df[['pid', 'num', 'title', 'rating']]
        problems_df.to_csv(filename, index=False)
    
    def save_user_ratings(self, filename):
        users_df = pd.DataFrame(self.user_ratings.items(), columns=['uid', 'rating'])
        users_df.to_csv(filename, index=False)

try:
    filename = 'attempt_count.csv'
    df = pd.read_csv(filename)

    # df = df[df['pid'] >= 941]
    threshold_count = 5

    df_count = df.groupby(['uid']).size().reset_index(name='count')
    df = df.merge(df_count, on=['uid'], how='left')
    df = df[df['count'] >= threshold_count]

    elo_system = EloRating()
    for index, row in df.iterrows():
        elo_system.update_rating(row['pid'], row['uid'], row['ac'], row['prev_attempt_count'])
    elo_system.save_problem_ratings('problem_ratings.csv')
    elo_system.save_user_ratings('user_ratings.csv')
except Exception as e:
    print(e)