# Use Uhunt API

In [1]:
import requests
import json
import pandas as pd

class UhuntAPI:
    def __init__(self):
        self.base_url = 'https://uhunt.onlinejudge.org/api'
        self.session = requests.Session()
        self.session.headers.update({'User-Agent': 'Mozilla/5.0'})
    
    def get_problems(self):
        path = '/p'
        url = f'{self.base_url}{path}'
        response = self.session.get(url)
        if response.status_code == 200:
            data = response.json()
            columns = ['pid', 'num', 'title', 'dacu', 'mrun', 'mmem', 'nover', 'sube', 'noj', 'inq', 'ce', 'rf', 're', 'ole', 'tle', 'mle', 'wa', 'pe', 'ac', 'rtl', 'status', 'rej']
            return pd.DataFrame(data, columns=columns)
        else:
            raise Exception(f"Failed to fetch problems: {response.status_code}")

    def get_problem_submissions(self, pid, start_sbt=0, end_sbt=2147483647):
        path = f'/p/subs/{pid}/{start_sbt}/{end_sbt}'
        url = f'{self.base_url}{path}'
        response = self.session.get(url)
        if response.status_code == 200:
            data = response.json()
            columns = ['sid', 'uid', 'pid', 'ver', 'lan', 'run', 'mem', 'rank', 'sbt', 'name', 'uname']
            return pd.DataFrame(data, columns=columns)
        else:
            raise Exception(f"Failed to fetch submissions for problem {pid}: {response.status_code}")


# Get Submission Data

In [None]:
import os

try:
    api = UhuntAPI()
    problems_df = api.get_problems()
    submissions_dir = 'submissions'
    if not os.path.exists(submissions_dir):
        os.makedirs(submissions_dir)
    for index, row in problems_df.iterrows():
        path = os.path.join(submissions_dir, f'{row["num"]}.csv')
        if os.path.exists(path):
            prev_submissions_df = pd.read_csv(path)
            last_submission_time = prev_submissions_df['sbt'].max()
            submissions_df = api.get_problem_submissions(row['pid'], last_submission_time + 1)
            submissions_df = pd.concat([prev_submissions_df, submissions_df], ignore_index=True)
            submissions_df.drop_duplicates(subset=['sid'], keep='last', inplace=True)
        else:
            submissions_df = api.get_problem_submissions(row['pid'])
        submissions_df.to_csv(path, index=False)
except Exception as e:
    print(e)

# Concatenate all submissions into a single CSV file

In [None]:
import os
import pandas as pd

try:
    submissions_dir = 'submissions'
    all_submissions = []
    for filename in os.listdir(submissions_dir):
        if filename.endswith('.csv'):
            path = os.path.join(submissions_dir, filename)
            df = pd.read_csv(path)
            all_submissions.append(df)
    all_submissions_df = pd.concat(all_submissions, ignore_index=True)
    all_submissions_df.drop_duplicates(subset=['sid'], keep='last', inplace=True)
    all_submissions_df.sort_values(by=['sid'], ascending=True, inplace=True)
    all_submissions_df.to_csv('all_submissions.csv', index=False)
except Exception as e:
    print(e)

# Get previous attempt count for each submission

In [53]:
import pandas as pd

try:
    # filename = 'test/all_submissions_1000.csv'
    filename = 'all_submissions.csv'
    df = pd.read_csv(filename)

    first_ac_sid = df[df['ver'] == 90].groupby(['uid', 'pid'])['sid'].min().reset_index()
    first_ac_sid.rename(columns={'sid': 'first_ac_sid'}, inplace=True)
    df = df.merge(first_ac_sid, on=['uid', 'pid'], how='left')

    attempt_df = df[
        df['ver'].between(30, 90)
        & ((df['first_ac_sid'].isna()) | (df['sid'] <= df['first_ac_sid']))
    ]
    prev_attempt_count = attempt_df.groupby(['uid', 'pid']).cumcount()
    attempt_ac = attempt_df['ver'] == 90

    attempt_df = attempt_df[['sid', 'uid', 'pid']]
    attempt_df['prev_attempt_count'] = prev_attempt_count
    attempt_df['ac'] = attempt_ac
    attempt_df.to_csv('attempt_count.csv', index=False)

except Exception as e:
    print(e)