In [None]:
import pandas as pd
import csv
import matplotlib.pyplot as plt
from google.colab import files


# Load the CSVs (change for the name of your respective documents)
df_old = pd.read_csv('botRecordOld.csv', encoding='unicode_escape')
df_new = pd.read_csv('botRecordNew.csv', encoding='unicode_escape')

# Clean whitespace and convert types
for df in [df_old, df_new]:
    df.columns = df.columns.str.strip()
    df['character'] = df['character'].str.strip()
    df['tag'] = df['tag'].str.strip()
    df['chats'] = df['chats'].str.replace(',', '').astype(int)
    df['likes'] = df['likes'].astype(int)

# Merge on composite key
df_merged = df_old.merge(df_new, on=['character', 'tag'], suffixes=('_old', '_new'), how='inner')

# Compute changes
df_merged['chats_diff'] = df_merged['chats_new'] - df_merged['chats_old']
df_merged['likes_diff'] = df_merged['likes_new'] - df_merged['likes_old']

df_merged['chats_pct'] = ((df_merged['chats_diff'] / df_merged['chats_old']) * 100).round(1)
df_merged['likes_pct'] = ((df_merged['likes_diff'] / df_merged['likes_old'].replace(0, 1)) * 100).round(1)  # avoid /0

# Only show rows where there's some increase
df_increased = df_merged[(df_merged['chats_diff'] > 0) | (df_merged['likes_diff'] > 0)].copy()

# Format display
df_increased['chats_change'] = '+' + df_increased['chats_diff'].astype(str) + ' (' + df_increased['chats_pct'].astype(str) + '%)'
df_increased['likes_change'] = '+' + df_increased['likes_diff'].astype(str) + ' (' + df_increased['likes_pct'].astype(str) + '%)'

summary = df_increased[['character', 'tag', 'chats_change', 'likes_change']]
print(summary)

# --- Plotting ---

# Top N increases in chats
top_chats = df_increased.sort_values('chats_diff', ascending=False).head(10)

plt.figure(figsize=(10, 6))
plt.barh(top_chats['character'] + " - " + top_chats['tag'], top_chats['chats_diff'], color='skyblue')
plt.xlabel('Chats Increase')
plt.title('Top 10 Increases in Chats')
plt.gca().invert_yaxis()
plt.tight_layout()
plt.show()

# Top N increases in likes
top_likes = df_increased.sort_values('likes_diff', ascending=False).head(10)

plt.figure(figsize=(10, 6))
plt.barh(top_likes['character'] + " - " + top_likes['tag'], top_likes['likes_diff'], color='salmon')
plt.xlabel('Likes Increase')
plt.title('Top 10 Increases in Likes')
plt.gca().invert_yaxis()
plt.tight_layout()
plt.show()

In [None]:
from IPython.display import display
display(df_increased)