In [1]:
import json
import os
try:
    from urlparse import urljoin
except:
    from urllib.parse import urljoin
from dotenv import load_dotenv
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup

load_dotenv()

CF_MAIN_URL = 'http://codeforces.com/'
CF_LOGIN_URL = CF_MAIN_URL + 'enter'
CONTEST_TYPE = os.getenv('CONTEST_TYPE')
CONTEST_ID = os.getenv('CONTEST_ID')
CONTEST_URL = CF_MAIN_URL + CONTEST_TYPE + '/' + CONTEST_ID
STATUS_PAGE = CONTEST_URL + '/status'
SUB_URL = CONTEST_URL + '/submission/'
HANDLE = os.getenv('HANDLE')
PASSWORD = os.getenv('PASSWORD')
extension = {
    'GNU C++': 'cpp',
    'Python': 'py',
    'Java': 'java',
    'Rust': 'rs',
    'Node.js': 'js',
}

In [2]:
class HtmlCollector():
    def __init__(self):
        self.driver = webdriver.Chrome('/snap/bin/chromium.chromedriver')
        self.html = []
        self.curPage = 1
    
    def login(self):
        self.driver.get(CF_LOGIN_URL)
        self.driver.find_element_by_id('handleOrEmail').send_keys(HANDLE)
        self.driver.find_element_by_id('password').send_keys(PASSWORD)
        self.driver.find_element_by_id('remember').click()
        self.driver.find_element_by_class_name('submit').submit()
        try:
            element = WebDriverWait(self.driver, 10).until(
            EC.presence_of_element_located((By.CLASS_NAME, 'personal-sidebar')))
        except:
            print('Login Failed!')

    def pageCount(self):
        pagination = self.driver.find_elements_by_class_name('page-index')
        return int(pagination[-1].text)

    def pageString(self, pageNum):
        if CONTEST_TYPE == 'contest':
            return '/page/' + str(pageNum) + '?order=BY_JUDGED_DESC'
        else:
            return '?pageIndex=' + str(pageNum) + '&order=BY_JUDGED_DESC'

    def iterStatusPage(self):
        while self.curPage == 1 or self.curPage <= self.pageCount():
            self.driver.get(STATUS_PAGE + self.pageString(self.curPage))
            self.saveHtml(self.curPage)
            self.curPage += 1

    def saveHtml(self, num):
        fName = './html/page' + str(num) + '.html'
        os.makedirs(os.path.dirname(fName), exist_ok=True)
        with open(fName, 'w') as f:
            self.html.append(self.driver.page_source)
            f.write(self.driver.page_source)

app = HtmlCollector()
app.login()
app.iterStatusPage()


In [None]:
class SubmissionCollector():
    def __init__(self, html):
        self.html = html
        self.subObjs = []

    def collectSubId(self):
        for html in self.html:
            soup = BeautifulSoup(html, 'html.parser')
            submissions = soup.find_all('tr', {'data-submission-id': True})
            for sub in submissions:
                handle = sub.find_all('td', {'data-participantid': True})[0].a.text
                prob = sub.find_all('td', {'data-problemid': True})[0].a.text.strip()[0]
                verdict = sub.find_all('td', {'submissionid': True})[0].span['submissionverdict']
                subId = sub.find_all('td', {'submissionid': True})[0].span['submissionid']
                lang = sub.find_all('td', {'data-problemid': True})[0].findNext('td').text.strip()
                ext = ''
                for key in extension:
                    if lang.startswith(key):
                        ext = extension[key]
                        break
                if verdict == 'OK':
                    self.subObjs.append({
                        'subId': subId,
                        'prob': prob,
                        'handle': handle,
                        'ext': ext,
                    })

    def collectSubmissions(self):
        for sub in self.subObjs:
            app.driver.get(SUB_URL + sub['subId'])
            self.getSource(sub)

    def getSource(self, sub):
        source = app.driver.find_element_by_id('program-source-text')
        fName = './src/' + CONTEST_ID + '/' + sub["prob"] + '/' + sub["ext"] + '/' + sub["handle"] + '_' + sub["subId"] + '.' + sub["ext"]
        os.makedirs(os.path.dirname(fName), exist_ok=True)
        with open(fName, 'w') as f:
            f.write(source.text)

sub = SubmissionCollector(app.html)
sub.collectSubId()
sub.collectSubmissions()
print(sub.subObjs)
print('end')