# 1. Import data and libraries

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib
import os
import folium
import json

In [None]:
%matplotlib inline

In [None]:
# Step 1: Load the new geojson file (ensure 'countries.geojson' is in the same directory as the script)
with open('countries.geojson', 'r') as f:
    country_geo = json.load(f)

In [None]:
# Assuming each 'feature' in the 'features' list of the GeoJSON has a 'properties' dictionary with a 'name' key for the country name
geo_countries = [feature['properties']['ADMIN'] for feature in country_geo['features']]

In [None]:
# Import data

df_cwur = pd.read_csv(os.path.join(path, '02 Data', 'Original Data', 'cwurData.csv'), index_col = False)

In [None]:
# Assuming 'df_cwur' contains a 'country' column
df_cwur_countries = df_cwur['country'].unique().tolist()

In [None]:
# Step 4: Compare the two lists of countries
missing_countries = [country for country in df_cwur_countries if country not in geo_countries]
extra_countries = [country for country in geo_countries if country not in df_cwur_countries]

In [None]:
if not missing_countries:
    print("All countries from df_cwur are present in the GeoJSON file.")
else:
    print(f"Countries missing in GeoJSON file: {missing_countries}")

if not extra_countries:
    print("No extra countries in the GeoJSON file.")
else:
    print(f"Extra countries in the GeoJSON file: {extra_countries}")

In [None]:
country_name_mapping = {
    "USA": "United States of America",
    "Hong Kong": "Hong Kong S.A.R.",
    "Slovak Republic": "Slovakia",
    "Serbia": "Republic of Serbia"
}


In [None]:
df_cwur['country'] = df_cwur['country'].replace(country_name_mapping)


In [None]:
print(df_cwur['country'].unique())

In [None]:
df_cwur.head()

In [None]:
df_cwur.shape

# 2.  Plotting a choropleth

In [None]:
# Create a data frame with just the states and the values for rating we want plotted

data_to_plot = df_cwur[['world_rank','country']]
data_to_plot.head()

In [None]:
# Setup a folium map at a high-level zoom
map = folium.Map(location = [100, 0], zoom_start = 1.5)

# Choropleth maps bind Pandas Data Frames and json geometries.This allows us to quickly visualize data combinations
folium.Choropleth(
    geo_data = country_geo, 
    data = data_to_plot,
    columns = ['country','world_rank'],
    key_on = 'feature.properties.ADMIN', 
    fill_color = 'YlOrBr', fill_opacity=0.6, line_opacity=0.1,
    legend_name = "Ranking").add_to(map)
folium.LayerControl().add_to(map)
map

In [None]:
#Calculating average score
aggregated_data = df_cwur.groupby('country')['score'].mean().reset_index()

# Check the resulting DataFrame
print(aggregated_data.head())


In [None]:
# Setup the folium map
map = folium.Map(location=[100, 0], zoom_start=1.5)

# Choropleth map
folium.Choropleth(
    geo_data=country_geo,  
    data=aggregated_data,  # Data with average scores
    columns=['country', 'score'],  
    key_on='feature.properties.ADMIN',  
    fill_color='YlOrBr', fill_opacity=0.6, line_opacity=0.1,
    legend_name="Average Score"  # Legend label
).add_to(map)

folium.LayerControl().add_to(map)

# Show map
map

In [None]:
# Calculate the average rank for each country
aggregated_ranks = df_cwur.groupby('country')['world_rank'].mean().reset_index()

# Invert the rank to make it easier to visualize (lower rank is better)
aggregated_ranks['rank_inverted'] = 1 / aggregated_ranks['world_rank']

# Check the resulting DataFrame
print(aggregated_ranks.head())


In [None]:
# Setup the folium map
map = folium.Map(location=[100, 0], zoom_start=1.5)

# Choropleth map
folium.Choropleth(
    geo_data=country_geo,  
    data=aggregated_ranks, 
    columns=['country', 'rank_inverted'],  # 'country' and 'rank_inverted' columns
    key_on='feature.properties.ADMIN', 
    fill_color='YlOrBr', fill_opacity=0.6, line_opacity=0.1,
    legend_name="Inverted Rank"  
).add_to(map)

folium.LayerControl().add_to(map)

# Show map
map

In [None]:
top_100_universities = df_cwur[df_cwur['world_rank'] <= 100]


In [None]:
top_100_by_country = top_100_universities.groupby(['country', 'year']).size()


In [None]:
total_top_100_global = top_100_universities.groupby('year').size()


In [None]:
top_100_percent_per_country = (top_100_by_country / total_top_100_global) * 100

top_100_percent_per_country = top_100_percent_per_country.reset_index(name='percentage')

print(top_100_percent_per_country.head())

In [None]:
top_100_percentage_2015 = top_100_percent_per_country[top_100_percent_per_country['year'] == 2015]


In [None]:
# Create a dictionary mapping country names to their percentage for 2015
country_percentage_map_2015 = top_100_percentage_2015.set_index('country')['percentage'].to_dict()


In [None]:
# Set up the Folium map
map_2015 = folium.Map(location=[100, 0], zoom_start=2)

# Create the choropleth layer for 2015
folium.Choropleth(
    geo_data=country_geo,
    data=country_percentage_map_2015,
    key_on='feature.properties.ADMIN',  
    fill_color='YlOrBr',  # Color scale for the choropleth map
    fill_opacity=0.6,
    line_opacity=0.1,
    legend_name="Top 100 Universities (%) - 2015",
).add_to(map_2015)

# Display the map
map_2015

# using geographic analysis we can answer the following questions:
•Which countries have the most highly-ranked universities?
As per the latest map we can see that the UShas the biggest % of highly-ranked universities according to 2015

•How do different regions (e.g., North America, Europe, Asia) perform in global university rankings? - according to the map North America and Europe appear to dominate the rankings.

