<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"></ul></div>

In [1]:
from bs4 import BeautifulSoup as bs
import pandas as pd
import requests
import datetime

In [2]:
r = requests.get('http://liquipedia.net/counterstrike/S-Tier_Tournaments')

In [3]:
bstext = bs(r.text)
display(bstext.text[:50])
tables = bstext.find_all('div', class_="gridTable tournamentCard Tierless")
print(len(tables), 'tables found.')

'\n\n\nS-Tier Tournaments - Liquipedia Counter-Strike '

13 tables found.


In [4]:
current_year = datetime.date.today().year

In [5]:
def scrape_tables(tables):
    data = []
    reached_past_year = False
    for table in tables:
        rows = table.find_all('div', class_=lambda x:'gridRow' in x)
        for row in rows:
            date_range = row.find('div', class_='gridCell EventDetails Date Header').text
            # print(date_range)
            try:
                year = int(date_range[-4:])
            except:
                continue
            if year < current_year:
                print(f'Before {year + 1}, breaking.')
                reached_past_year = True
                return data
            name  = row.find('div', class_='gridCell Tournament Header').find('b').find('a').text
            prize = row.find('div', class_='gridCell EventDetails Prize Header').text[1:].replace(',', '')
            data.append([name, date_range, prize])

rows = scrape_tables(tables)
df = pd.DataFrame(rows, columns=['name', 'date_range', 'prize'])
df.head()

Before 2023, breaking.


Unnamed: 0,name,date_range,prize
0,CS2 Major Championship Winter 2024,"Dec 01 - 15, 2024",1250000
1,PGL Major Copenhagen 2024,"Mar 17 - 31, 2024",1250000
2,BLAST Premier: World Final 2023,"Dec 13 - 17, 2023",1000000
3,BLAST Premier: Fall Final 2023,"Nov 22 - 26, 2023",425000
4,CS:GO Asia Championships 2023,"Nov 08 - 12, 2023",500000


In [6]:
df['start_date'] = pd.to_datetime( df.date_range.apply(lambda x:x[:6] + x[-5:]) )
df['end_date']   = pd.to_datetime( df.date_range.apply(lambda x:x[:4] + x.split('- ')[1].split(', ')[0] + x[-5:] if len(x)<19 else x.split('- ')[1].split(', ')[0] + x[-5:]) )
df

Unnamed: 0,name,date_range,prize,start_date,end_date
0,CS2 Major Championship Winter 2024,"Dec 01 - 15, 2024",1250000,2024-12-01,2024-12-15
1,PGL Major Copenhagen 2024,"Mar 17 - 31, 2024",1250000,2024-03-17,2024-03-31
2,BLAST Premier: World Final 2023,"Dec 13 - 17, 2023",1000000,2023-12-13,2023-12-17
3,BLAST Premier: Fall Final 2023,"Nov 22 - 26, 2023",425000,2023-11-22,2023-11-26
4,CS:GO Asia Championships 2023,"Nov 08 - 12, 2023",500000,2023-11-08,2023-11-12
5,Thunderpick World Championship 2023,"Oct 27 - Nov 05, 2023",500000,2023-10-27,2023-11-05
6,Roobet Cup 2023,"Oct 26 - Nov 05, 2023",250000,2023-10-26,2023-11-05
7,Intel Extreme Masters Sydney 2023,"Oct 16 - 22, 2023",250000,2023-10-16,2023-10-22
8,ESL Pro League Season 18,"Aug 30 - Oct 01, 2023",850000,2023-08-30,2023-10-01
9,Gamers8 2023,"Aug 16 - 20, 2023",1000000,2023-08-16,2023-08-20


In [7]:
from icalendar import Calendar, Event

cal = Calendar()
cal.add('summary', 'csgo S Tier Events')

for (n, row) in df.iterrows():
    event = Event()
    event.add('summary', row['name'])
    event.add('uid', row['name'].replace(' ', '').lower())
    event.add('dtstart', row.start_date)
    event.add('dtend', row.end_date + pd.to_timedelta("1d"))
    
    cal.add_component(event)

with open('csgo_S_tier_calendar.ics', 'wb') as f:
    f.write(cal.to_ical())