In [1]:
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd
from tqdm import tqdm

import datetime

import plotly.express as px

## Load motions from calico tabs

In [2]:
motions_suburl = f"motions/statistics"

calico_tabs = [
    ('nsdc25', 'NordicSDC2025'),
    ('eurosdc25', 'eurosdc-25'),
    # ('esdc', 'esdc2025'),
    ('argument', 'eco2025'),
    ('argument', 'ECO2024'),
    ('nordics2023', 'nordics2023')
]

In [3]:
def parse_motion(div): 
    """
    Given a motion top-level div, parse out the relevant details.
    """
    round = div.find('span', class_="badge").get_text(strip=True)
    motion = div.find('h4').find(string=True, recursive=False).strip()

    info_slide = div.find(string=re.compile("View Info Slide"))
    info_slide_text = None

    if info_slide is not None:
        info_slide_text = info_slide.find_next('div', class_='modal-body').get_text(strip=True)

    # regex match the wins for prop and opp
    text = div.get_text()

    re_prop = re.compile(r"(\d+) (Prop|Gov) win")
    re_opp = re.compile(r"(\d+) Opp win")

    prop_wins = re.findall(re_prop, text)[0][0]
    opp_wins = re.findall(re_opp, text)[0][0]

    balance = div.find(string=re.compile('balance')).get_text(strip=True)

    return motion, round, info_slide_text, prop_wins, opp_wins, balance

In [4]:
data = {
    "Tournament": [],
    "Motion": [],
    "Round": [],
    "Info Slide": [],

    "Prop wins": [],
    "Opp wins": [],
    "Balance": [],
}

for subdomain, slug in calico_tabs:
    calico_url = f"https://{subdomain}.calicotab.com/{slug}"
    motions_url = f"{calico_url}/{motions_suburl}"

    response = requests.get(motions_url) 
    try: 
        assert response.status_code == 200, f"Failed to fetch {motions_url}: {response.status_code}"
    except: 
        continue

    html = response.text
    soup = BeautifulSoup(html, 'html.parser')

    motion_divs = soup.find_all('div', class_="list-group mt-3")
    print(f"Found {len(motion_divs)} motions from {slug}.")

    for i in tqdm(range(len(motion_divs))):
        div = motion_divs[i]
        motion, round, info_slide_text, prop_wins, opp_wins, balance = parse_motion(div)
        

        data["Motion"].append(motion)
        data["Round"].append(round)
        data["Info Slide"].append(info_slide_text)
        data["Prop wins"].append(int(prop_wins))
        data["Opp wins"].append(int(opp_wins))
        data["Balance"].append(balance)
        data["Tournament"].append(slug)

Found 11 motions from NordicSDC2025.


100%|██████████| 11/11 [00:00<00:00, 5113.30it/s]


Found 10 motions from eurosdc-25.


100%|██████████| 10/10 [00:00<00:00, 8353.52it/s]


Found 10 motions from eco2025.


100%|██████████| 10/10 [00:00<00:00, 10954.05it/s]


Found 10 motions from ECO2024.


100%|██████████| 10/10 [00:00<00:00, 9489.38it/s]


Found 9 motions from nordics2023.


100%|██████████| 9/9 [00:00<00:00, 5757.89it/s]


In [5]:
df1 = pd.DataFrame(data)
df1.tail()

Unnamed: 0,Tournament,Motion,Round,Info Slide,Prop wins,Opp wins,Balance
45,nordics2023,THBT the European Union should abolish the req...,Round 5,On issues considered sensitive to the EU (incl...,6,7,probably balanced
46,nordics2023,THR the expectation that committed romantic re...,Round 6,"Just like a lot of you, we were very tired and...",10,3,imbalanced at 10% level
47,nordics2023,THR the aesthetisation of suffering,Quarterfinals,Aesthetisation is a depiction or glorification...,4,0,balance inconclusive
48,nordics2023,THBT criminal justice policy should be decided...,Semifinals,Technocrats are decision-makers who are select...,1,1,balance inconclusive
49,nordics2023,THR religious faith being predominantly experi...,Grand Final,,1,0,balance inconclusive


## Parse .txt from GitHub
https://github.com/tokyodebate/motions/blob/main/International/WSDC.txt

In [6]:
data2 = {
    "Tournament": [],
    "Motion": [],
    "Round": [],
    "Info Slide": [],

    # "Prop wins": [],
    # "Opp wins": [],
    # "Balance": [],
}

def count_tabs(line: str):
    """
    Count the number of tabs in a line.
    """
    return line.count('\t')

with open('WSDC.txt', 'r') as file:
    lines = file.readlines()
    lines = [line.strip('\n') for line in lines]

    lines = lines[1:] # Skip the header

    tournament = None
    round = None
    motions = []

    for i in range(len(lines)):
        curr = lines[i]

        if count_tabs(curr) == 1: 
            tournament = curr.strip()
        elif count_tabs(curr) == 2:
            round = curr.strip()
        elif count_tabs(curr) == 3:
            next_line = lines[i+1] if i+1 < len(lines) else None
            info_text = next_line.strip() if next_line and count_tabs(next_line) == 4 else None

            motion = curr.strip()   
            data2["Motion"].append(motion)
            data2["Round"].append(round)
            data2["Info Slide"].append(info_text)
            data2["Tournament"].append(tournament)

        else:
            continue


In [7]:
df2 = pd.DataFrame(data2)
df2.value_counts('Tournament')

Tournament
Netherlands WSDC 2022                                                           21
WSDC Mexico 2020                                                                20
24th World Schools Debating Championships 2012, South Africa                    13
21st World Schools Debating Championships 2009 in Athens, Greece                13
Vietnam WSDC 2023                                                               13
30th World Schools Debating Championships 2018, Croatia and Slovenia            13
29th World Schools Debating Championships 2017, Indonesia                       13
28th World Schools Debating Championships 2016, Germany                         13
27th World Schools Debating Championships 2015, Singapore                       13
WSDC Thailand 2019                                                              13
16th World Schools Debating Championships 2004 in Stuttgart, Germany            13
15th World Schools Debating Championships 2003 in Lima, Peru                

In [8]:
df = pd.concat([df1, df2], ignore_index=True)
df.head()

Unnamed: 0,Tournament,Motion,Round,Info Slide,Prop wins,Opp wins,Balance
0,NordicSDC2025,This House Supports gentle parenting becoming ...,Round 1,Gentle parenting is a parenting style that say...,16.0,1.0,imbalanced at 50% level
1,NordicSDC2025,THR the creation of the series and film industry,Round 2,,13.0,1.0,probably balanced
2,NordicSDC2025,This House Believes That it is in the interest...,Round 3,Democratic backsliding is a process of regime ...,15.0,1.0,probably balanced
3,NordicSDC2025,This House Opposes the Globalization of the Ma...,Round 4,“Major European Football Leagues” include the ...,10.0,1.0,imbalanced at 50% level
4,NordicSDC2025,This House Would implement a weekly 'blackout ...,Round 5,A 'blackout day' refers to a day where all soc...,13.0,1.0,probably balanced


## Data polishing 

### Duplicates

In [9]:
# check for duplicates on motion and round 
# motions can be duplicated (if same is used in two different rounds)
duplicates = df[df.duplicated(subset=['Motion', 'Round'], keep=False)]

if len(duplicates) > 0:
    raise ValueError(f"Found {len(duplicates)} duplicates in the data. Please check the data for inconsistencies.")

### Tournament year

In [10]:
def get_year(tournament: str):
    pattern = re.compile(r'\d{4}')
    match = pattern.search(tournament)

    if tournament == 'eurosdc-25':
        return 2025

    if match:
        return int(match.group(0))
    else:
        return None

df['Year'] = df['Tournament'].apply(get_year)
df.value_counts('Year')

Year
2025    31
2023    22
2022    21
2020    20
2012    13
2019    13
2018    13
2003    13
2004    13
2005    13
2017    13
2007    13
2016    13
2009    13
2015    13
2013    12
2011    12
2010    12
2008    12
2006    12
2002    12
2001    12
1999    12
2014    11
2021    11
2024    10
1994     9
1997     5
1998     5
2000     4
Name: count, dtype: int64

### Motion types

In [11]:
def fix_motions(motion: str):
    if motion == "This House as the environmental movement would support the use of extremist tactics":
        return "This House, as the environmental movement, would support the use of extremist tactics."
    
    if motion == "TH, as the average 25 year old Romanian, would wait for their 'statistical soulmate'(josh is sad and lonely)":
        return motion.replace('would', "W")

    if motion.startswith('That'):
        motion = 'This House believes that' + motion[4:]
    
    return motion

df['Motion'] = df['Motion'].apply(fix_motions)

In [12]:
full_to_abbr = {
    'BT': "believes",
    'B': "believes",
    "S": "supports",
    "O": "opposes",
    "W": "would",
    "P": "prefers",
    "R": "regrets",
}


def get_motion_type(motion: str): 
    # https://regex101.com/r/r8h59q/1
    pattern = re.compile(r"(?:This (h|H)ouse?|TH)[ ,]?(?:,.*?, )?(?P<type>\w+)")

    match = re.search(pattern, motion)

    if match: 
        motion_type = match.group('type')
        all_caps = all(map(str.isupper, motion_type))

        if all_caps:
            return f'{full_to_abbr[motion_type]}'
        else: 
            return f'{motion_type.lower()}'

    return motion

df["Motion Type"] = df["Motion"].apply(get_motion_type)

In [19]:
pd.set_option('display.max_colwidth', None)
df

Unnamed: 0,Tournament,Motion,Round,Info Slide,Prop wins,Opp wins,Balance,Year,Motion Type
0,NordicSDC2025,This House Supports gentle parenting becoming the norm,Round 1,"Gentle parenting is a parenting style that says that parents should be non-confrontational. The style states that parents should focus on deliberation with (young) children, have high levels of patience and avoid punishments and ultimatums.",16.0,1.0,imbalanced at 50% level,2025,supports
1,NordicSDC2025,THR the creation of the series and film industry,Round 2,,13.0,1.0,probably balanced,2025,regrets
2,NordicSDC2025,This House Believes That it is in the interest of Georgian government to democratically backslide,Round 3,"Democratic backsliding is a process of regime change toward autocracy in which the votes of individuals have less power and political power becomes repressive. Examples include limiting free speech/press, corruption, nepotism or gaining control over state institutions.",15.0,1.0,probably balanced,2025,believes
3,NordicSDC2025,This House Opposes the Globalization of the Major European Football Leagues,Round 4,"“Major European Football Leagues” include the English Premier League, the German Bundesliga, the Italian Seria A, the French Ligue 1 and the Spanish La Liga.For the purpose of this debate, globalization of football leagues refers to increasing the number of international players, creating ownership franchises and actively promoting the league in foreign countries.",10.0,1.0,imbalanced at 50% level,2025,opposes
4,NordicSDC2025,This House Would implement a weekly 'blackout day',Round 5,"A 'blackout day' refers to a day where all social media sites, streaming services, TV, internet, radio, and similar technologies are inaccessible for personal, non-emergency use. For example, one would still be able to call hospitals and banks but would not be able to watch downloaded Netflix series",13.0,1.0,probably balanced,2025,would
...,...,...,...,...,...,...,...,...,...
383,6th World Schools Debating Championships 1994 in New Zealand,This House believes that feminism is corrupting the family.,R7,,,,,1994,believes
384,6th World Schools Debating Championships 1994 in New Zealand,This House believes that Hollywood has a lot to answer for.,Some of the impromptu motions,,,,,1994,believes
385,6th World Schools Debating Championships 1994 in New Zealand,This House believes that tourists are a global menace.,Some of the impromptu motions,,,,,1994,believes
386,6th World Schools Debating Championships 1994 in New Zealand,This House believes that repression of civil rights justifies violent action.,Some of the impromptu motions,,,,,1994,believes


## Analysis

### All-time motion types

In [14]:
counts = df.value_counts('Motion Type')
print(counts)

px.bar(
    counts, 
    y='count', 
    title='Motion Types',
    labels={'Motion Type': 'Motion Type'},
).show()

Motion Type
believes       174
would          136
supports        26
regrets         19
prefers         18
opposes         10
condemns         2
disapproves      1
fears            1
refuses          1
Name: count, dtype: int64


### This year's motion types

In [15]:
current_year = datetime.datetime.now().year

df_this_year = df[df['Year'] == current_year]
df_this_year.tail()

Unnamed: 0,Tournament,Motion,Round,Info Slide,Prop wins,Opp wins,Balance,Year,Motion Type
26,eco2025,THS the development of genetic engineering,Round 6,"Genetic engineering (also called genetic modification) is a process that uses laboratory-based technologies to alter the DNA makeup of an organism. This may involve changing a single base pair, deleting a region of DNA or adding a new segment of DNA. For example, genetic engineering may involve adding a gene from one species to an organism from a different species to produce a desired trait. Used in research and industry, genetic engineering has been applied to the production of cancer therapies, brewing yeasts, genetically modified plants and livestock, and more. Currently, genetic engineering is being worked on by scientists in order to be applied on humans in the womb.",10.0,5.0,imbalanced at 50% level,2025,supports
27,eco2025,THP a world in which all people have chronic Pinocchiosis,Novice Grand Final,"Chronic Pinocchiosis is a condition that causes an individual’s nose to horizontally grow by a millimetre every time they knowingly lie. Once it grows, it does not shrink back, and it is magically immune to rhinoplasty or any other attempt at artificial shrinkage. Long noses do not cause any significant health issues and nose growth caused by Chronic Pinocchiosis is not hereditary.",1.0,0.0,balance inconclusive,2025,prefers
28,eco2025,"This House, as the environmental movement, would support the use of extremist tactics.",Quarterfinals,<div>Extremist tactics includes the destruction of property and large-scale civil disobedience i.e blocking roads and buildings</div><div><br></div>,0.0,4.0,balance inconclusive,2025,would
29,eco2025,"THBT the heads of central banks (e.g. the Federal Reserve, the European Central Bank) should be democratically elected",Semifinals,,0.0,2.0,balance inconclusive,2025,believes
30,eco2025,THW ban pet ownership,Grand Final,,1.0,0.0,balance inconclusive,2025,would


In [16]:
counts = df_this_year.value_counts('Motion Type')

px.bar(
    counts, 
    y='count', 
    title=f'Motion Types for {current_year}',
    labels={'Motion Type': 'Motion Type'},
).show()

In [17]:
prop_wins = df['Prop wins'].sum()
opp_wins = df['Opp wins'].sum()

print(prop_wins, opp_wins)

px.pie(
    values=[prop_wins, opp_wins],
    names=['Prop wins', 'Opp wins'],
    title='Total Prop vs Opp Wins',
).show()

290.0 165.0


In [18]:
prop_wins = df_this_year['Prop wins'].sum()
opp_wins = df_this_year['Opp wins'].sum()

print(prop_wins, opp_wins)

px.pie(
    values=[prop_wins, opp_wins],
    names=['Prop wins', 'Opp wins'],
    title='Total Prop vs Opp Wins',
).show()

196.0 85.0
