In [9]:
import pandas as pd
import plotly.express as px
import unicodedata

# Load your cleaned data
df_combined_full = pd.read_csv("../data/csv/df_clean.csv")

# Step 1: Clean and split multi-country entries into multiple rows
df_map = df_combined_full[['country', 'year']].dropna()

# Normalize and split country column by ';'
df_map['country'] = df_map['country'].str.strip()

# Split and explode
df_map = df_map.assign(country=df_map['country'].str.split(';')).explode('country')

# Normalize function to fix duplicates (e.g., UK)
def normalize_country(country):
    if pd.isna(country):
        return None
    # Unicode normalization, strip whitespace, and casefold
    country_cleaned = unicodedata.normalize('NFKC', country).strip().casefold()

    uk_aliases = {
        'uk', 'united kingdom', 'england', 'scotland', 'wales', 'northern ireland'
    }
    if country_cleaned in uk_aliases:
        return 'United Kingdom'
    return country_cleaned.title()  # Standardize capitalization

# Apply normalization
df_map['country'] = df_map['country'].apply(normalize_country)

# Step 2: Group by year and country
df_grouped = (
    df_map.groupby(['year', 'country'])
    .size()
    .reset_index(name='publication_count')
)

# Step 3: Plot as animated bubble map
max_publications = df_grouped['publication_count'].max()

fig = px.scatter_geo(
    df_grouped,
    locations='country',
    locationmode='country names',
    size='publication_count',
    hover_name='country',
    animation_frame='year',
    projection='natural earth',
    title='Publications per Country Over Time (Bubble Map)',
    size_max=80,
    color='publication_count',
    color_continuous_scale='Viridis',
    range_color=[0, max_publications],
    labels={'publication_count': 'Number of Publications'},
)

fig.update_layout(
    width=1200,
    height=800,
    geo=dict(
        showframe=False,
        showcoastlines=False,
        showcountries=True,
    ),
    margin={"r":0,"t":40,"l":0,"b":0}
)

# Save and show
fig.write_html("../data/html/publications_bubble_map.html")
fig.show()

