In [None]:
# !pip install torch==2.7.1+cpu torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
# !pip install pandas==2.2.3 scikit-learn==1.6.1 evaluate==0.4.3 datasets==3.0.1 matplotlib
# !pip install transformers==4.54.1 accelerate==1.10.1
#!pip install pandas numpy matplotlib seaborn matplotlib plotly plotly_express scikit-learn plotly imbalanced-learn

In [5]:
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments, AutoModel
import csv
import os
import numpy as np
import torch.nn.functional as F
from sklearn.metrics import silhouette_score
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA 
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from matplotlib.ticker import MaxNLocator

#### 2024 Distribution 

In [8]:
df2024 = pd.read_csv('classified samples/2024_Classified.csv')
df2024 = df2024.rename(columns={'predicted_label': 'label'})

In [8]:
# where df1 label is = 'Democrat' change to left 
df2024.loc[df2024['label'] == 'Democrat', 'label'] = 'Left'
# where df1 label is = 'Republican' change to right
df2024.loc[df2024['label'] == 'Republican', 'label'] = 'Right'

In [9]:
df2024.info()
# df2024.to_csv('2024_Combined_Classified.csv', index=False)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 270000 entries, 0 to 269999
Data columns (total 2 columns):
 #   Column  Non-Null Count   Dtype 
---  ------  --------------   ----- 
 0   label   270000 non-null  object
 1   tweet   270000 non-null  object
dtypes: object(2)
memory usage: 4.1+ MB


In [10]:
df2024.head()

Unnamed: 0,label,tweet
0,Left,Mr. @ USER you should also buy TikTok a well s...
1,Right,@ USER @ USER They say we should be more grate...
2,Left,Stop Bidens DIGITAL DOLLAR NOW ! He 's try to ...
3,Left,@ USER Trump do n't take the Republican party ...
4,Left,:police_car_light:Watch CNN explain President ...


In [11]:
# Custom palette and labels
cust_p = ["#DC1414", "#66B2FF"]
labels = ["Right", "Left"]
# Count how many rows for each political leaning
count_right = df2024[df2024["label"] == "Right"].shape[0]
count_left = df2024[df2024["label"] == "Left"].shape[0]


# Create a bar chart
fig = go.Figure(
    data=[
        go.Bar(x=["Right"], y=[count_right], name="Right", marker_color=cust_p[0]),
        go.Bar(x=["Left"], y=[count_left], name="Left", marker_color=cust_p[1]),
    ]
)#(Plotly, 2025a)

fig.update_layout(
    barmode="group",
    width=600,             
    height=450,           
    bargap=0.4,
    title="2024 Political Leaning Distribution",
    xaxis_title="Political Leaning",
    yaxis_title="Count",
    plot_bgcolor="#646161",
    legend_title="Political Leaning",
)
#(Plotly, 2025a)
fig.show()


In [13]:
# count were Left and Right 
count_right = df2024[df2024["label"] == "Right"].shape[0]
count_left = df2024[df2024["label"] == "Left"].shape[0]
print("Count of Right:", count_right)
print("Count of Left:", count_left)

Count of Right: 153928
Count of Left: 116072


In [23]:
left_df = df2024[df2024['label'] == 'Left']
right_df = df2024[df2024['label'] == 'Right']
#  target size for balanced dataset
target_size = 95899 
# downsample 
left_balanced = left_df.sample(n=target_size, replace=(len(left_df) < target_size), random_state=42)
right_balanced = right_df.sample(n=target_size, replace=(len(right_df) < target_size), random_state=42)
# combine into a single balanced dataframe
df_balanced = pd.concat([left_balanced, right_balanced]).sample(frac=1, random_state=42).reset_index(drop=True)
print(df_balanced['label'].value_counts())

label
Left     95899
Right    95899
Name: count, dtype: int64


In [19]:
# df_balanced.to_csv('2024_Classified_Balanced.csv', index=False)
# df2024 = df_balanced
df_balanced = pd.read_csv('classified samples/2024_Classified_Balanced.csv')
df2024 = df_balanced

In [20]:
# Custom palette and labels
cust_p = ["#DC1414", "#66B2FF"]
labels = ["Right", "Left"]
# Count how many rows for each political leaning
count_right = df2024[df2024["label"] == "Right"].shape[0]
count_left = df2024[df2024["label"] == "Left"].shape[0]


# Create a bar chart
fig = go.Figure(
    data=[
        go.Bar(x=["Right"], y=[count_right], name="Right", marker_color=cust_p[0]),
        go.Bar(x=["Left"], y=[count_left], name="Left", marker_color=cust_p[1]),
    ]
)#(Plotly, 2025a)

fig.update_layout(
    barmode="group",
    width=600,             
    height=450,           
    bargap=0.4,
    title="2024 Political Leaning Distribution Balanced",
    xaxis_title="Political Leaning",
    yaxis_title="Count",
    plot_bgcolor="#646161",
    legend_title="Political Leaning",
)
#(Plotly, 2025a)
fig.show()


#### 2020 Distribution 

In [13]:
df1 = pd.read_csv('classified samples/2020_Classified_model19.csv')
df1 = df1.rename(columns={'predicted_label': 'label'})
df2020 = df1
# where df1 label is = 'Democrat' change to left 
df2020.loc[df2020['label'] == 'Democrat', 'label'] = 'Left'
# where df1 label is = 'Republican' change to right
df2020.loc[df2020['label'] == 'Republican', 'label'] = 'Right'

In [14]:
df2020.head()

Unnamed: 0,label,tweet
0,Left,@ USER Glad u get out of the house DICKtrump 2020
1,Right,Tell Politicians to STICK IT with this FREE It...
2,Right,@ USER I wonder which drug Trump take Thats no...
3,Left,Now Open Create a FREE US Election Pickem Pool...
4,Right,Democrats have spend more tax payer pay time m...


In [15]:
# Custom palette and labels
cust_p = ["#DC1414", "#66B2FF"]
labels = ["Right", "Left"]
# Count how many rows for each political leaning
count_right = df2020[df2020["label"] == "Right"].shape[0]
count_left = df2020[df2020["label"] == "Left"].shape[0]


# Create a bar chart
fig = go.Figure(
    data=[
        go.Bar(x=["Right"], y=[count_right], name="Right", marker_color=cust_p[0]),
        go.Bar(x=["Left"], y=[count_left], name="Left", marker_color=cust_p[1]),
    ]
)#(Plotly, 2025a)

fig.update_layout(
    barmode="group",
    width=600,             
    height=450,           
    bargap=0.4,
    title="2020 Political Leaning Distribution",
    xaxis_title="Political Leaning",
    yaxis_title="Count",
    plot_bgcolor="#646161",
    legend_title="Political Leaning",
)
#(Plotly, 2025a)
fig.show()


In [16]:
# count were Left and Right 
count_right = df2020[df2020["label"] == "Right"].shape[0]
count_left = df2020[df2020["label"] == "Left"].shape[0]
print("Count of Right:", count_right)
print("Count of Left:", count_left)

Count of Right: 95899
Count of Left: 128851


In [20]:
left_df = df2020[df2020['label'] == 'Left']
right_df = df2020[df2020['label'] == 'Right']
#  target size for balanced dataset
target_size = 95899  
# downsample 
left_balanced = left_df.sample(n=target_size, replace=(len(left_df) < target_size), random_state=42)
right_balanced = right_df.sample(n=target_size, replace=(len(right_df) < target_size), random_state=42)
# combine into a single balanced dataframe
df_balanced = pd.concat([left_balanced, right_balanced]).sample(frac=1, random_state=42).reset_index(drop=True)
print(df_balanced['label'].value_counts())

label
Left     95899
Right    95899
Name: count, dtype: int64


In [17]:
# df_balanced.to_csv('classified samples/2020_Classified_Balanced.csv', index=False)
df_balanced = pd.read_csv('classified samples/2020_Classified_Balanced.csv')
df2020 = df_balanced

In [18]:
# Custom palette and labels
cust_p = ["#DC1414", "#66B2FF"]
labels = ["Right", "Left"]
# Count how many rows for each political leaning
count_right = df2020[df2020["label"] == "Right"].shape[0]
count_left = df2020[df2020["label"] == "Left"].shape[0]


# Create a bar chart
fig = go.Figure(
    data=[
        go.Bar(x=["Right"], y=[count_right], name="Right", marker_color=cust_p[0]),
        go.Bar(x=["Left"], y=[count_left], name="Left", marker_color=cust_p[1]),
    ]
)#(Plotly, 2025a)

fig.update_layout(
    barmode="group",
    width=600,             
    height=450,           
    bargap=0.4,
    title="2020 Political Leaning Distribution Balanced",
    xaxis_title="Political Leaning",
    yaxis_title="Count",
    plot_bgcolor="#646161",
    legend_title="Political Leaning",
)
#(Plotly, 2025a)
fig.show()