### Assigning Temporal Phases to Sportswashing Events


In [19]:
import pandas as pd
from datetime import datetime

# Load dataset
df = pd.read_csv('Final_Thesis_Merged.csv')
df['Date'] = pd.to_datetime(df['Date'], errors='coerce').dt.tz_localize(None)
df['Event'] = df['Event'].str.strip()

# Define events with known temporal ranges (excluding Gulf multi-sport and Formula 1)
event_dates = {
    'FIFA World Cup 2022': {
        'start': datetime(2022, 11, 20),
        'end': datetime(2022, 12, 18)
    },
    'LIV Golf': {
        'start': datetime(2022, 6, 9),
        'end': datetime(2022, 9, 30)
    },
    'Saudi Pro League': {
        'start': datetime(2023, 6, 15),
        'end': datetime(2023, 9, 30)
    },
    'Newcastle United ownership': {
        'start': datetime(2021, 10, 7),
        'end': datetime(2021, 10, 14)
    },
    'Manchester City ownership': {
        'start': datetime(2008, 9, 1),
        'end': datetime(2009, 3, 1)
    },
    'Paris Saint-Germain ownership': {
        'start': datetime(2011, 6, 30),
        'end': datetime(2011, 12, 31)
    }
}

# Filter the dataset to only include the above events
df = df[df['Event'].isin(event_dates.keys())].copy()

# Assign phase
def assign_phase(row):
    event = row['Event']
    date = row['Date']
    if pd.isna(date) or event not in event_dates:
        return 'Unknown'
    
    start = event_dates[event]['start']
    end = event_dates[event]['end']

    if date < start:
        return 'Pre'
    elif start <= date <= end:
        return 'During'
    else:
        return 'Post'

df['Event_Phase'] = df.apply(assign_phase, axis=1)

# Save to CSV
df.to_csv('Temporal_Events_Only.csv', index=False)

# Optional preview
print(df[['Event', 'Date', 'Event_Phase']].dropna().head())


  df = pd.read_csv('Final_Thesis_Merged.csv')


                 Event                Date Event_Phase
0  FIFA World Cup 2022 2024-05-01 17:50:48        Post
1     Saudi Pro League 2023-06-13 00:57:32         Pre
2  FIFA World Cup 2022 2022-12-28 04:57:05        Post
3  FIFA World Cup 2022 2022-10-07 17:58:57         Pre
4  FIFA World Cup 2022 2022-09-26 18:43:07         Pre


### Comment Distribution Across Event Phases (Pre, During, Post)


In [22]:
# Load the cleaned dataset with temporal labels
df_temporal = pd.read_csv("Temporal_Events_Only.csv")

# Group by Event and Phase, count number of comments
event_phase_counts = df_temporal.groupby(['Event', 'Event_Phase']).size().reset_index(name='Comment_Count')

# Pivot the table to get a row per event and columns for Pre/During/Post
pivot_table = event_phase_counts.pivot(index='Event', columns='Event_Phase', values='Comment_Count') \
                                 .fillna(0).astype(int)

print(pivot_table)


Event_Phase                    During  Post    Pre
Event                                             
FIFA World Cup 2022             30626  5958  13180
LIV Golf                         1015  2386     30
Manchester City ownership           0  4757      0
Newcastle United ownership       1006  2328   1292
Paris Saint-Germain ownership       0   460      0
Saudi Pro League                 1804  2251   1710


  df_temporal = pd.read_csv("Temporal_Events_Only.csv")
