# Imports

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from scipy.stats import pearsonr

# Load dataset

In [None]:
df = pd.read_csv("../dataset/TikTokEuropeanElections_Abortion_War.csv")

# Engagement analysis

- **Sentiment Score vs Engagement: All data**

In [None]:
# Subset of the relevant columns
cols = ['sentiment_score_compound', 'likes', 'followers', 'average_order']
subset = df[cols].dropna()

# Plot
fig, axes = plt.subplots(1,3, figsize=(18, 5))
sns.regplot(data=subset, x='average_order', y='sentiment_score_compound', ax=axes[0], scatter_kws={'alpha':0.3})
axes[0].set_title('Sentiment vs. Average Order')
sns.regplot(data=subset, x='likes', y='sentiment_score_compound', ax=axes[1], scatter_kws={'alpha':0.3})
axes[1].set_title('Sentiment vs. Likes')
sns.regplot(data=subset, x='followers', y='sentiment_score_compound', ax=axes[2], scatter_kws={'alpha':0.3})
axes[2].set_title('Sentiment vs. Followers')
plt.tight_layout()
plt.show()

In [None]:
# Subset relevant columns, dropping rows with missing values
cols = ['sentiment_score_compound', 'likes', 'followers', 'average_order']
subset = df[cols].dropna()

# Compute Pearson r and p-value for each feature
correlation_results = {}
for col in ['likes', 'followers', 'average_order']:
    r, p = pearsonr(subset['sentiment_score_compound'], subset[col])
    correlation_results[col] = {'r': r, 'p_value': p}

# Print the results
print("Pearson Correlation Results with 'sentiment_score_compound':")
for feature, result in correlation_results.items():
    print(f"{feature}: r = {result['r']:.3f}, p = {result['p_value']:.4f}")


- **Sentiment Score vs Engagement: Remove outliers Q1=25% and Q3=75%**

In [None]:
# Subset of relevant columns
cols = ['sentiment_score_compound', 'likes', 'followers', 'average_order']
subset = df[cols].dropna()

# Function to remove outliers using IQR
def remove_outliers_iqr(df, columns):
    filtered_df = df.copy()
    for col in columns:
        Q1 = filtered_df[col].quantile(0.25)
        Q3 = filtered_df[col].quantile(0.75)
        IQR = Q3 - Q1
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR
        filtered_df = filtered_df[(filtered_df[col] >= lower_bound) & (filtered_df[col] <= upper_bound)]
    return filtered_df

# Remove outliers from the subset
filtered_subset = remove_outliers_iqr(subset, ['likes', 'followers', 'average_order'])

# Compute Pearson r and p-value
correlation_results = {}
for col in ['likes', 'followers', 'average_order']:
    r, p = pearsonr(filtered_subset['sentiment_score_compound'], filtered_subset[col])
    correlation_results[col] = {'r': r, 'p_value': p}

# Print results
print("Pearson Correlation Results with 'sentiment_score_compound' (after outlier removal):")
for feature, result in correlation_results.items():
    print(f"{feature}: r = {result['r']:.3f}, p = {result['p_value']:.4f}")

# Plot
fig, axes = plt.subplots(1, 3, figsize=(18, 5))
sns.regplot(data=filtered_subset, x='average_order', y='sentiment_score_compound', ax=axes[0], scatter_kws={'alpha':0.3})
axes[0].set_title('Sentiment vs. Average Order')
sns.regplot(data=filtered_subset, x='likes', y='sentiment_score_compound', ax=axes[1], scatter_kws={'alpha':0.3})
axes[1].set_title('Sentiment vs. Likes')
sns.regplot(data=filtered_subset, x='followers', y='sentiment_score_compound', ax=axes[2], scatter_kws={'alpha':0.3})
axes[2].set_title('Sentiment vs. Followers')
plt.tight_layout()
plt.show()


- **Relation between the political wing mentions and engagement**

In [None]:
# Group by %left
mean_by_ideology = df.groupby('% left')[['likes', 'followers']].mean().reset_index()

# Plot
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(mean_by_ideology['% left'], mean_by_ideology['likes'], color='blue')
plt.title('Mean Likes by % Left')
plt.xlabel('% Left')
plt.ylabel('Mean Likes')

plt.subplot(1, 2, 2)
plt.plot(mean_by_ideology['% left'], mean_by_ideology['followers'], color='green')
plt.title('Mean Followers by % Left')
plt.xlabel('% Left')
plt.ylabel('Mean Followers')

# Compute Pearson correlation and p-value
r_likes, p_likes = pearsonr(mean_by_ideology['% left'], mean_by_ideology['likes'])
r_followers, p_followers = pearsonr(mean_by_ideology['% left'], mean_by_ideology['followers'])

# Print results
print("Correlation with % left:")
print(f"Likes     → r = {r_likes:.3f}, p = {p_likes:.4f}")
print(f"Followers → r = {r_followers:.3f}, p = {p_followers:.4f}")

plt.tight_layout()
plt.show()

In [None]:
# Group by %right
mean_by_ideology = df.groupby('% right')[['likes', 'followers']].mean().reset_index()

# Plot
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(mean_by_ideology['% right'], mean_by_ideology['likes'], color='blue')
plt.title('Mean Likes by % right')
plt.xlabel('% right')
plt.ylabel('Mean Likes')

plt.subplot(1, 2, 2)
plt.plot(mean_by_ideology['% right'], mean_by_ideology['followers'], color='green')
plt.title('Mean Followers by % right')
plt.xlabel('% right')
plt.ylabel('Mean Followers')

# Compute Pearson correlation and p-value
r_likes, p_likes = pearsonr(mean_by_ideology['% right'], mean_by_ideology['likes'])
r_followers, p_followers = pearsonr(mean_by_ideology['% right'], mean_by_ideology['followers'])

# Print results
print("Correlation with % right:")
print(f"Likes     → r = {r_likes:.3f}, p = {p_likes:.4f}")
print(f"Followers → r = {r_followers:.3f}, p = {p_followers:.4f}")

plt.tight_layout()
plt.show()

- **Relating subjectivity score to engagement**

In [None]:
# Drop rows with missing values in the relevant columns
df_clean = df[['subjectivity_score', 'average_order', 'likes', 'followers']].dropna()

# Correlation between subjectivity and other variables
correlation_order = df_clean['subjectivity_score'].corr(df_clean['average_order'])
correlation_likes = df_clean['subjectivity_score'].corr(df_clean['likes'])
correlation_followers = df_clean['subjectivity_score'].corr(df_clean['followers'])

print(f"Correlation between subjectivity and order of appearance: {correlation_order:.3f}")
print(f"Correlation between subjectivity and likes: {correlation_likes:.3f}")
print(f"Correlation between subjectivity and followers: {correlation_followers:.3f}")

# Plot
plt.figure(figsize=(14, 6))

# Plot Subjectivity vs Order of Appearance
plt.subplot(1, 3, 1)
sns.regplot(data=df_clean, x='average_order', y='subjectivity_score', scatter_kws={'alpha':0.3}, line_kws={'color': 'red'})
plt.title('Subjectivity vs Order of Appearance')
plt.xlabel('Order of Appearance')
plt.ylabel('Subjectivity')


# Plot Subjectivity vs Likes
plt.subplot(1, 3, 2)
sns.regplot(data=df_clean, x='likes', y='subjectivity_score', scatter_kws={'alpha':0.3}, line_kws={'color': 'red'})
plt.title('Subjectivity vs Likes')
plt.xlabel('Likes')
plt.ylabel('Subjectivity')

# Plot Subjectivity vs Followers
plt.subplot(1, 3, 3)
sns.regplot(data=df_clean, x='followers', y='subjectivity_score', scatter_kws={'alpha':0.3}, line_kws={'color': 'red'})
plt.title('Subjectivity vs Followers')
plt.xlabel('Followers')
plt.ylabel('Subjectivity')

plt.tight_layout()
plt.show()

- **Relating subjectivity score to engagement without outliers**

In [None]:
# Drop rows with missing values in the relevant columns
df_clean = df[['subjectivity_score', 'average_order', 'likes', 'followers']].dropna()

def remove_outliers_iqr(df, columns):
    filtered_df = df.copy()
    for col in columns:
        Q1 = filtered_df[col].quantile(0.25)
        Q3 = filtered_df[col].quantile(0.75)
        IQR = Q3 - Q1
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR
        filtered_df = filtered_df[(filtered_df[col] >= lower_bound) & (filtered_df[col] <= upper_bound)]
    return filtered_df

df_clean = remove_outliers_iqr(df_clean, ['subjectivity_score', 'average_order', 'likes', 'followers'])

# Pearson correlation and p-values
r_order, p_order = pearsonr(df_clean['subjectivity_score'], df_clean['average_order'])
r_likes, p_likes = pearsonr(df_clean['subjectivity_score'], df_clean['likes'])
r_followers, p_followers = pearsonr(df_clean['subjectivity_score'], df_clean['followers'])

# Print results
print("Pearson correlation results:")
print(f"Order of Appearance → r = {r_order:.3f}, p = {p_order:.4f}")
print(f"Likes               → r = {r_likes:.3f}, p = {p_likes:.4f}")
print(f"Followers           → r = {r_followers:.3f}, p = {p_followers:.4f}")

# Plot
plt.figure(figsize=(14, 6))

# Plot Subjectivity vs Order of Appearance
plt.subplot(1, 3, 1)
sns.regplot(data=df_clean, x='average_order', y='subjectivity_score', scatter_kws={'alpha':0.3}, line_kws={'color': 'red'})
plt.title('Subjectivity vs Order of Appearance')
plt.xlabel('Order of Appearance')
plt.ylabel('Subjectivity')

# Plot Subjectivity vs Likes
plt.subplot(1, 3, 2)
sns.regplot(data=df_clean, x='likes', y='subjectivity_score', scatter_kws={'alpha':0.3}, line_kws={'color': 'red'})
plt.title('Subjectivity vs Likes')
plt.xlabel('Likes')
plt.ylabel('Subjectivity')

# Plot Subjectivity vs Followers
plt.subplot(1, 3, 3)
sns.regplot(data=df_clean, x='followers', y='subjectivity_score', scatter_kws={'alpha':0.3}, line_kws={'color': 'red'})
plt.title('Subjectivity vs Followers')
plt.xlabel('Followers')
plt.ylabel('Subjectivity')

plt.tight_layout()
plt.show()