In [99]:
import bs4, requests, json
import pandas as pd
from statistics import mean, median
from tqdm import tqdm

# List of all contests (gym = true)

In [2]:
url = "http://codeforces.com/api/contest.list?gym=true"

res = requests.get(url)

soup = bs4.BeautifulSoup(res.text,'html.parser')
newDictionary=json.loads(str(soup))

all_contests_df = pd.DataFrame(newDictionary['result'])

all_contests_df.head()


Unnamed: 0,id,name,type,phase,frozen,durationSeconds,description,difficulty,kind,season,preparedBy,icpcRegion,country,city,startTimeSeconds,relativeTimeSeconds,websiteUrl
0,100001,2010 Codeforces Beta Round #1 (training),ICPC,FINISHED,False,7200,This is the only contest for testing Codeforce...,3.0,Training Contest,2010-2011,,,,,,,
1,100002,2002-2003 ACM-ICPC Northeastern European Regio...,ICPC,FINISHED,False,18000,,4.0,Official ICPC Contest,2002-2003,MikeMirzayanov,Northeastern Europe Region,Russia,Saint Petersburg,,,
2,100003,2008-2009 Всероссийская командная олимпиада шк...,ICPC,FINISHED,False,18000,,3.0,Official School Contest,2008-2009,Edvard,,Russia,Saint Petersburg,1453514000.0,186928891.0,
3,100004,Local Contest,ICPC,FINISHED,False,10800,,3.0,Official School Contest,2017-2018,MikeMirzayanov,,Uzbekistan,Namangan,1511928000.0,128515291.0,
4,100005,2010-2011 Цикл интернет-олимпиад. Вторая коман...,ICPC,FINISHED,False,18000,,3.0,Training Contest,2010-2011,PavelKunyavskiy,,Russia,,,,


In [3]:
all_contests_df.kind.value_counts()

Official ICPC Contest                         303
Official School Contest                       284
Training Contest                              244
School/University/City/Region Championship    201
Training Camp Contest                         126
Official International Personal Contest       113
Opencup Contest                                21
Name: kind, dtype: int64

In [4]:
opencup_contests_df = all_contests_df[all_contests_df.kind == 'Opencup Contest']
opencup_contests_df.to_csv('Data/contests/opencup_contests.csv', index=False)
opencup_contests_df.head()

Unnamed: 0,id,name,type,phase,frozen,durationSeconds,description,difficulty,kind,season,preparedBy,icpcRegion,country,city,startTimeSeconds,relativeTimeSeconds,websiteUrl
317,100514,Crypto Cup 1.0,ICPC,FINISHED,False,23400,Cryptography Open CUP,3.0,Opencup Contest,2014-2015,Damon,,Iran,Tehran,1414845000.0,225598291.0,http://eprint.iacr.org
345,100571,Hello 2015 (Div.2),ICPC,FINISHED,False,10800,,3.0,Opencup Contest,2014-2015,PrinceOfPersia,,Iran,Tehran,1420645000.0,219798091.0,
374,100633,"2015 ICL, Finals, Div. 1",ICPC,FINISHED,False,18000,,4.0,Opencup Contest,2014-2015,Scalar,,Russia,Kazan’,,,http://icl.ru/turnir/
375,100637,"2015 ICL, Finals, Div. 2",ICPC,FINISHED,False,18000,,3.0,Opencup Contest,2014-2015,Scalar,,Russia,Kazan’,,,http://icl.ru/turnir/
492,100917,"2015-2016 XVI Open Cup, Grand Prix of Bashkort...",ICPC,FINISHED,False,18000,,4.0,Opencup Contest,2015-2016,MikeMirzayanov,,Russia,Izhevsk,1457210000.0,183233491.0,http://opencup.ru/


In [5]:
opencup_contests_ids = list(opencup_contests_df.id)

# Final standing of open cup contests

In [6]:
def clean_rows(df):
    # data = pd.DataFrame
    df['contestId'] = df.party.apply(lambda x: x['contestId'])
    df['members'] = df.party.apply(lambda x: x['members'] if x['members'] else [])
    df['teamName'] = df.party.apply(lambda x: x['teamName'] if ('teamName' in x.keys()) else '')
    df['participantType'] = df.party.apply(lambda x: x['participantType'] if ('participantType' in x.keys()) else '')
    df['ghost'] = df.party.apply(lambda x: x['ghost'] if ('ghost' in x.keys()) else '')
    return df




In [7]:
final_df = pd.DataFrame()
standing_dfs = []
problems_dfs = []
for i in opencup_contests_ids:
    url = "http://codeforces.com/api/contest.standings?contestId={}&from=1&count=150&showUnofficial=true".format(i)
    res = requests.get(url)
    while(res.status_code != 200):
        res = requests.get(url)
    soup = bs4.BeautifulSoup(res.text,'html.parser')
    newDictionary=json.loads(str(soup))
    standing_df = pd.DataFrame(newDictionary['result']['rows'])
    standing_dfs.append((clean_rows(standing_df)))
    problems_dfs.append(pd.DataFrame(newDictionary['result']['problems']))

In [8]:
standing_df = pd.concat(standing_dfs)
standing_df.to_csv('Data/contests/standing_opencup.csv', index=False)

In [9]:
probems_df = pd.concat(problems_dfs)
probems_df.to_csv('Data/contests/problems_opencup.csv', index=False)

# get data of users

In [98]:
def api_result(req):
    res = requests.get(req)
    soup = bs4.BeautifulSoup(res.text,'html.parser')
    newDictionary = json.loads(str(soup))
    return newDictionary
def get_users_info(x):
    req = "https://codeforces.com/api/user.info?handles="
    for i in x:
        req += i
        if i != x[-1]:
            req += ";"
    return api_result(req)

def get_submission_info(user, cnt = 100000): # returns 
    #print("CNT = ", cnt)
    req = f"https://codeforces.com/api/user.status?handle={user}&from=1&count={cnt}"
    x = api_result(req)
    #print(x['status'])
    if x['status'] != 'OK':
        print("BAD")
        assert(0)
    else:
        cnt_verdicts = {}
        list_rates = {}
        tag_info = {}
        for i in range(len(x['result'])):
            problem = x['result'][i]['problem']
            if not 'rating' in problem:
                continue
            rate = problem['rating']

            tags = problem['tags']
            verdict = x['result'][i]['verdict']
            if not verdict in cnt_verdicts:
                cnt_verdicts[verdict] = 0
            cnt_verdicts[verdict] = cnt_verdicts[verdict] + 1
            if verdict == 'OK':
                for tag in tags:
                    if not tag in list_rates:
                        list_rates[tag] = []
                    list_rates[tag].append(rate)

        for tag in list_rates.keys():
            rates = list_rates[tag]
            ans = {}
            ans["num"] = len(rates)
            ans["mean"] = mean(rates)
            ans["median"] = median(rates)
            tag_info[tag] = ans


        return tag_info, cnt_verdicts

In [78]:
users = ['tourist', 'Benq']


x = get_users_info(users)

In [79]:
print(x)

{'status': 'OK', 'result': [{'lastName': 'Korotkevich', 'country': 'Belarus', 'lastOnlineTimeSeconds': 1640393365, 'city': 'Gomel', 'rating': 3756, 'friendOfCount': 39206, 'titlePhoto': 'https://userpic.codeforces.org/422/title/50a270ed4a722867.jpg', 'handle': 'tourist', 'avatar': 'https://userpic.codeforces.org/422/avatar/2b5dbe87f0d859a2.jpg', 'firstName': 'Gennady', 'contribution': 175, 'organization': 'ITMO University', 'rank': 'legendary grandmaster', 'maxRating': 3870, 'registrationTimeSeconds': 1265987288, 'maxRank': 'legendary grandmaster'}, {'lastName': 'Qi', 'country': 'United States', 'lastOnlineTimeSeconds': 1640397785, 'city': 'Princeton', 'rating': 3532, 'friendOfCount': 8133, 'titlePhoto': 'https://userpic.codeforces.org/312472/title/7cf0a442d4071e87.jpg', 'handle': 'Benq', 'avatar': 'https://userpic.codeforces.org/312472/avatar/5716ac69aea8159a.jpg', 'firstName': 'Benjamin', 'contribution': 90, 'organization': 'MIT', 'rank': 'legendary grandmaster', 'maxRating': 3797, '

In [92]:
x, y = get_submission_info('Benq')

CNT =  100000
OK


In [93]:
print(x)

{'brute force': {'num': 447, 'mean': 1947.6510067114093, 'median': 1900}, 'constructive algorithms': {'num': 466, 'mean': 2106.6523605150214, 'median': 2100.0}, 'data structures': {'num': 720, 'mean': 2511.1111111111113, 'median': 2500.0}, 'graphs': {'num': 410, 'mean': 2421.951219512195, 'median': 2400.0}, 'shortest paths': {'num': 83, 'mean': 2453.012048192771, 'median': 2400}, 'combinatorics': {'num': 247, 'mean': 2454.655870445344, 'median': 2500}, 'dp': {'num': 735, 'mean': 2419.4557823129253, 'median': 2400}, 'implementation': {'num': 707, 'mean': 1589.2503536067893, 'median': 1500}, 'math': {'num': 854, 'mean': 2098.4777517564403, 'median': 2100.0}, 'trees': {'num': 362, 'mean': 2496.1325966850827, 'median': 2500.0}, 'matrices': {'num': 62, 'mean': 2609.6774193548385, 'median': 2600.0}, 'divide and conquer': {'num': 128, 'mean': 2614.0625, 'median': 2600.0}, 'number theory': {'num': 243, 'mean': 2197.9423868312756, 'median': 2200}, 'greedy': {'num': 692, 'mean': 1920.52023121387

In [94]:
print(y)

{'OK': 3195, 'TIME_LIMIT_EXCEEDED': 576, 'WRONG_ANSWER': 1550, 'COMPILATION_ERROR': 125, 'RUNTIME_ERROR': 263, 'MEMORY_LIMIT_EXCEEDED': 50, 'SKIPPED': 9, 'PARTIAL': 29, 'CHALLENGED': 20, 'IDLENESS_LIMIT_EXCEEDED': 7, 'FAILED': 1}


In [95]:
top10 = ['tourist', 'Benq', 'maroonrk', 'Miracle03', 'peehs_moorhsum', 'Radewoosh', 'Petr', 'sunset', 'ko_osaga', 'jiangly']
print(len(top10))

10


In [100]:
def get_dataframe(users_list):
    data = {'user': [], 'tag_info': [], 'verdict_count': []}
    for user in tqdm(users_list):
        x, y = get_submission_info(user)
        data['user'].append(user)
        data['tag_info'].append(x)
        data['verdict_count'].append(y)
    df = pd.DataFrame(data)
    return df

In [102]:
users_df = get_dataframe(users_list=top10)
users_df.head()

100%|██████████| 10/10 [00:25<00:00,  2.53s/it]


Unnamed: 0,user,tag_info,verdict_count
0,tourist,"{'data structures': {'num': 326, 'mean': 2385....","{'OK': 1777, 'WRONG_ANSWER': 480, 'TIME_LIMIT_..."
1,Benq,"{'brute force': {'num': 447, 'mean': 1947.6510...","{'OK': 3195, 'TIME_LIMIT_EXCEEDED': 576, 'WRON..."
2,maroonrk,"{'binary search': {'num': 142, 'mean': 2269.71...","{'OK': 1362, 'WRONG_ANSWER': 586, 'TIME_LIMIT_..."
3,Miracle03,"{'data structures': {'num': 120, 'mean': 2465....","{'OK': 565, 'TIME_LIMIT_EXCEEDED': 130, 'COMPI..."
4,peehs_moorhsum,"{'data structures': {'num': 76, 'mean': 2247.3...","{'OK': 337, 'MEMORY_LIMIT_EXCEEDED': 13, 'TIME..."


In [103]:
users_df.to_csv('Data/users/users.csv', index=False)