In [1]:
import pandas as pd
import pycountry
import re
import plotly.express as px

# Read the csv file into a pandas dataframe
df = pd.read_csv('https://raw.githubusercontent.com/YusufAliOzkan/zotero-intelligence-bibliography/main/all_items.csv')

# Dictionary to map non-proper country names to their proper names
country_map = {
    'british': 'UK',
    'great britain': 'UK',
    'UK' : 'UK', 
    'america' : 'United States',
    'United States of America' : 'United States',
    'Soviet Union': 'Russia', 
    'american' : 'United States',
    'United States' : 'United States',
    'russian' : 'Russia'
    # Add more mappings as needed
}

# Find the country names in the "title" column of the dataframe
found_countries = {}
for i, row in df.iterrows():
    title = str(row['Title']).lower()
    for country in pycountry.countries:
        name = country.name.lower()
        if name in title or (name + 's') in title:  # Check for singular and plural forms of country names
            proper_name = country.name
            found_countries[proper_name] = found_countries.get(proper_name, 0) + 1
    for non_proper, proper in country_map.items():
        if non_proper in title:
            found_countries[proper] = found_countries.get(proper, 0) + title.count(non_proper)

# Create a new dataframe containing the found countries and their counts
df_countries = pd.DataFrame({'Country': list(found_countries.keys()), 'Count': list(found_countries.values())})


In [2]:
fig = px.choropleth(df_countries, locations='Country', locationmode='country names', color='Count', 
                    title='Country mentions in titles', color_continuous_scale='Viridis',
                    width=1100, height=700) # Adjust the size of the map here

# Display the map
fig.show()

In [3]:
df_countries=df_countries.sort_values(by='Count', ascending=False)
df_countries.reset_index(drop=True)

Unnamed: 0,Country,Count
0,UK,203
1,United States,168
2,Ukraine,88
3,Russia,55
4,Israel,28
...,...,...
69,North Macedonia,1
70,Romania,1
71,Bangladesh,1
72,Sri Lanka,1


In [4]:
df_countries.to_csv('countries.csv',index=False)