In [89]:
# -----------------------------
# Data manipulation
# -----------------------------
import pandas as pd
import numpy as np
import json
import ast

# -----------------------------
# Plotting
# -----------------------------
import matplotlib.pyplot as plt
import seaborn as sns

import requests
from io import StringIO

In [90]:
# URL for rank_by_year.csv
url_rank = "https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-09-09/rank_by_year.csv"

# Fetch CSV via requests (disable SSL verification temporarily)
response = requests.get(url_rank, verify=False)
csv_data = StringIO(response.text)

# Read into pandas DataFrame
rank_by_year = pd.read_csv(csv_data)

# Quick check
print("Rank by Year:")
print(rank_by_year)





Rank by Year:
     code                country       region  rank  visa_free_count  year
0      AF            Afghanistan         ASIA   116               26  2021
1      AF            Afghanistan         ASIA   106               26  2020
2      AF            Afghanistan         ASIA   106               30  2018
3      AF            Afghanistan         ASIA   104               24  2017
4      AF            Afghanistan         ASIA   104               25  2016
...   ...                    ...          ...   ...              ...   ...
3945   PS  Palestinian Territory  MIDDLE EAST   102               37  2019
3946   PS  Palestinian Territory  MIDDLE EAST   105               37  2022
3947   PS  Palestinian Territory  MIDDLE EAST   103               38  2023
3948   PS  Palestinian Territory  MIDDLE EAST    98               40  2024
3949   PS  Palestinian Territory  MIDDLE EAST    93               39  2025

[3950 rows x 6 columns]


In [91]:
data.sort_values(by = ["country", "year"], ascending = True, inplace = True)

import plotly.express as px
import pandas as pd


# Create an animated choropleth map
fig = px.choropleth(
    data,
    locations = "country",
    locationmode = "country names",
    color = "visa_free_count",
    animation_frame = "year",   # 👈 this adds the time slider
    color_continuous_scale = "blues",
    projection = "natural earth",
    title = "Global Map with Time Slider"
)

fig.update_layout(
    geo = dict(showframe = False, showcoastlines = True),
    margin = dict(l = 0, r = 0, t = 30, b = 0)
)

fig.show()


The library used by the *country names* `locationmode` option is changing in an upcoming version. Country names in existing plots may not work in the new version. To ensure consistent behavior, consider setting `locationmode` to *ISO-3*.



In [92]:
max_year = rank_by_year['year'].max()
print(f"Max year in rank_by_year: {max_year}")

# Filter for the latest year
latest_rankings = rank_by_year[rank_by_year['year'] == max_year]
latest_rankings = latest_rankings.sort_values(by='rank')
print(latest_rankings)


Max year in rank_by_year: 2025
     code      country       region  rank  visa_free_count  year
2977   SG    Singapore         ASIA     1              193  2025
3097   KR  South Korea         ASIA     2              190  2025
1669   JP        Japan         ASIA     2              190  2025
3135   ES        Spain       EUROPE     3              189  2025
1249   DE      Germany       EUROPE     3              189  2025
...   ...          ...          ...   ...              ...   ...
3793   YE        Yemen  MIDDLE EAST    95               32  2025
2579   PK     Pakistan         ASIA    95               32  2025
1569   IQ         Iraq  MIDDLE EAST    96               30  2025
3335   SY        Syria  MIDDLE EAST    97               27  2025
19     AF  Afghanistan         ASIA    98               25  2025

[199 rows x 6 columns]


In [93]:
# URL for country_lists.csv
url_country = "https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-09-09/country_lists.csv"

# Fetch CSV via requests (disable SSL verification temporarily)
response = requests.get(url_country, verify=False)
csv_data = StringIO(response.text)

# Read into pandas DataFrame
country_lists = pd.read_csv(csv_data)

# Quick check
print("Country Lists:")
country_lists.head()





Country Lists:


Unnamed: 0,code,country,visa_required,visa_online,visa_on_arrival,visa_free_access,electronic_travel_authorisation
0,PS,Palestinian Territory,"[[{""code"":""AF"",""name"":""Afghanistan""},{""code"":""...","[[{""code"":""AG"",""name"":""Antigua and Barbuda""},{...","[[{""code"":""BD"",""name"":""Bangladesh""},{""code"":""B...","[[{""code"":""BO"",""name"":""Bolivia""},{""code"":""CK"",...","[[{""code"":""LK"",""name"":""Sri Lanka""},{""code"":""KE..."
1,AD,Andorra,"[[{""code"":""AF"",""name"":""Afghanistan""},{""code"":""...","[[{""code"":""AO"",""name"":""Angola""},{""code"":""AZ"",""...","[[{""code"":""BH"",""name"":""Bahrain""},{""code"":""BD"",...","[[{""code"":""JP"",""name"":""Japan""},{""code"":""AL"",""n...","[[{""code"":""AU"",""name"":""Australia""},{""code"":""CA..."
2,VA,Vatican City,"[[{""code"":""AF"",""name"":""Afghanistan""},{""code"":""...","[[{""code"":""AZ"",""name"":""Azerbaijan""},{""code"":""B...","[[{""code"":""BH"",""name"":""Bahrain""},{""code"":""BD"",...","[[{""code"":""AL"",""name"":""Albania""},{""code"":""AD"",...","[[{""code"":""AU"",""name"":""Australia""},{""code"":""CA..."
3,SM,San Marino,"[[{""code"":""AF"",""name"":""Afghanistan""},{""code"":""...","[[{""code"":""AZ"",""name"":""Azerbaijan""},{""code"":""B...","[[{""code"":""BH"",""name"":""Bahrain""},{""code"":""BD"",...","[[{""code"":""JP"",""name"":""Japan""},{""code"":""AL"",""n...","[[{""code"":""AU"",""name"":""Australia""},{""code"":""CA..."
4,MC,Monaco,"[[{""code"":""AF"",""name"":""Afghanistan""},{""code"":""...","[[{""code"":""AZ"",""name"":""Azerbaijan""},{""code"":""B...","[[{""code"":""BH"",""name"":""Bahrain""},{""code"":""BD"",...","[[{""code"":""JP"",""name"":""Japan""},{""code"":""AL"",""n...","[[{""code"":""AU"",""name"":""Australia""},{""code"":""CA..."


In [94]:
json_cols = [
    'visa_required',
    'visa_online',
    'visa_on_arrival',
    'visa_free_access',
    'electronic_travel_authorisation'
]

import json

def clean_json_field(text):
    """Convert the messy stringified JSON fields into clean Python lists of dicts."""
    if pd.isna(text):
        return []
    try:
        data = json.loads(text)
        # Many fields are [[{...}]] — unwrap the extra list
        if isinstance(data, list) and len(data) == 1 and isinstance(data[0], list):
            data = data[0]
        return data
    except Exception as e:
        # Optional: print which rows failed to parse
        # print("Error parsing:", text[:80], e)
        return []
    
for col in json_cols:
    country_lists[col] = country_lists[col].apply(clean_json_field)

# Quick check
print("Cleaned Country Lists:")
country_lists.head()


Cleaned Country Lists:


Unnamed: 0,code,country,visa_required,visa_online,visa_on_arrival,visa_free_access,electronic_travel_authorisation
0,PS,Palestinian Territory,"[{'code': 'AF', 'name': 'Afghanistan'}, {'code...","[{'code': 'AG', 'name': 'Antigua and Barbuda'}...","[{'code': 'BD', 'name': 'Bangladesh'}, {'code'...","[{'code': 'BO', 'name': 'Bolivia'}, {'code': '...","[{'code': 'LK', 'name': 'Sri Lanka'}, {'code':..."
1,AD,Andorra,"[{'code': 'AF', 'name': 'Afghanistan'}, {'code...","[{'code': 'AO', 'name': 'Angola'}, {'code': 'A...","[{'code': 'BH', 'name': 'Bahrain'}, {'code': '...","[{'code': 'JP', 'name': 'Japan'}, {'code': 'AL...","[{'code': 'AU', 'name': 'Australia'}, {'code':..."
2,VA,Vatican City,"[{'code': 'AF', 'name': 'Afghanistan'}, {'code...","[{'code': 'AZ', 'name': 'Azerbaijan'}, {'code'...","[{'code': 'BH', 'name': 'Bahrain'}, {'code': '...","[{'code': 'AL', 'name': 'Albania'}, {'code': '...","[{'code': 'AU', 'name': 'Australia'}, {'code':..."
3,SM,San Marino,"[{'code': 'AF', 'name': 'Afghanistan'}, {'code...","[{'code': 'AZ', 'name': 'Azerbaijan'}, {'code'...","[{'code': 'BH', 'name': 'Bahrain'}, {'code': '...","[{'code': 'JP', 'name': 'Japan'}, {'code': 'AL...","[{'code': 'AU', 'name': 'Australia'}, {'code':..."
4,MC,Monaco,"[{'code': 'AF', 'name': 'Afghanistan'}, {'code...","[{'code': 'AZ', 'name': 'Azerbaijan'}, {'code'...","[{'code': 'BH', 'name': 'Bahrain'}, {'code': '...","[{'code': 'JP', 'name': 'Japan'}, {'code': 'AL...","[{'code': 'AU', 'name': 'Australia'}, {'code':..."


In [95]:
visa_cols = [
    'visa_required',
    'visa_online',
    'visa_on_arrival',
    'visa_free_access',
    'electronic_travel_authorisation'
]

from pandas import json_normalize

flat_frames = []

for col in visa_cols:
    temp = country_lists[['code', 'country', col]].explode(col)
    temp = temp.dropna(subset=[col])
    temp['visa_type'] = col
    temp['to_code'] = temp[col].apply(lambda x: x.get('code') if isinstance(x, dict) else None)
    temp['to_name'] = temp[col].apply(lambda x: x.get('name') if isinstance(x, dict) else None)
    temp = temp.drop(columns=[col])
    flat_frames.append(temp)

# Combine all into one DataFrame
flat_df = pd.concat(flat_frames, ignore_index=True)

# Remove rows missing target country codes
flat_df = flat_df.dropna(subset=['to_code'])

# Drop duplicates if any
flat_df = flat_df.drop_duplicates(subset=['code', 'to_code', 'visa_type']).reset_index(drop=True)

# Optional: rename columns for clarity
flat_df.rename(columns={
    'code': 'from_code',
    'country': 'from_country'
}, inplace=True)


# Quick check
print("Flattened Visa Data:")
flat_df.head()

Flattened Visa Data:


Unnamed: 0,from_code,from_country,visa_type,to_code,to_name
0,PS,Palestinian Territory,visa_required,AF,Afghanistan
1,PS,Palestinian Territory,visa_required,DZ,Algeria
2,PS,Palestinian Territory,visa_required,AD,Andorra
3,PS,Palestinian Territory,visa_required,AO,Angola
4,PS,Palestinian Territory,visa_required,AI,Anguilla


In [96]:
visa_summary = flat_df.groupby('visa_type').size().reset_index(name='count')
print(visa_summary)


                         visa_type  count
0  electronic_travel_authorisation   1382
1                 visa_free_access  15066
2                  visa_on_arrival   5316
3                      visa_online   5817
4                    visa_required  17392


In [97]:
visa_by_country = (
    flat_df.groupby(['from_country', 'visa_type'])
    .size()
    .reset_index(name='destination_count')
    .sort_values(['from_country', 'visa_type'])
)

visa_pivot = visa_by_country.pivot(
    index='from_country',
    columns='visa_type',
    values='destination_count'
).fillna(0).astype(int)
visa_pivot = visa_pivot.reset_index()
print("Visa Counts by Country:")
visa_pivot.head()

Visa Counts by Country:


visa_type,from_country,electronic_travel_authorisation,visa_free_access,visa_on_arrival,visa_online,visa_required
0,Afghanistan,3,6,16,43,158
1,Albania,6,88,29,29,74
2,Algeria,2,26,27,39,132
3,Andorra,16,120,35,23,32
4,Angola,2,26,20,37,141


In [98]:
# --- Define visa weights ---
visa_weights = {
    'visa_required': 0,
    'visa_online': 1,
    'visa_on_arrival': 2,
    'electronic_travel_authorisation': 3,
    'visa_free_access': 4
}

# --- Ensure all visa type columns exist ---
for visa_type in visa_weights:
    if visa_type not in visa_pivot.columns:
        visa_pivot[visa_type] = 0

# --- Compute total score ---
visa_pivot['total_score'] = sum(visa_pivot[vt] * weight for vt, weight in visa_weights.items())

# --- Compute normalized score (average per destination) ---
visa_types = list(visa_weights.keys())
visa_pivot['num_destinations'] = visa_pivot[visa_types].sum(axis=1)
visa_pivot['avg_score'] = visa_pivot['total_score'] / visa_pivot['num_destinations']

# --- Keep only necessary columns ---
passport_scores = visa_pivot[['from_country', 'total_score', 'num_destinations', 'avg_score']].copy()

# --- Sort by avg_score descending ---
passport_scores = passport_scores.sort_values('avg_score', ascending=False).reset_index(drop=True)

# --- Quick check ---
print(passport_scores.head())


visa_type from_country  total_score  num_destinations  avg_score
0            Singapore          720               226   3.185841
1              Finland          707               226   3.128319
2               France          705               226   3.119469
3              Belgium          705               226   3.119469
4              Denmark          705               226   3.119469


In [99]:

# --- Merge passport scores with latest rankings to get region ---
passport_scores_region = passport_scores.merge(
    latest_rankings[['country', 'region']],
    left_on='from_country',
    right_on='country',
    how='left'
)

# --- Drop duplicate country column (keep 'from_country') ---
passport_scores_region = passport_scores_region.drop(columns=['country'])

# --- Quick check ---
print(passport_scores_region.head())



  from_country  total_score  num_destinations  avg_score  region
0    Singapore          720               226   3.185841    ASIA
1      Finland          707               226   3.128319  EUROPE
2       France          705               226   3.119469  EUROPE
3      Belgium          705               226   3.119469  EUROPE
4      Denmark          705               226   3.119469  EUROPE


In [100]:
import plotly.express as px

# --- Treemap ---
fig = px.treemap(
    passport_scores_region,
    path=['region', 'from_country'],   # hierarchy: region -> country
    values='total_score',              # size of rectangle
    color='avg_score',                 # color by average score
    hover_data={
        'total_score': True,
        'num_destinations': True,
        'avg_score': ':.2f'
    },
    color_continuous_scale='Viridis',
    title='Passport Strength by Country and Region'
)

fig.update_layout(
    margin=dict(l=20, r=20, t=50, b=20),
    height=800,
    width=1200
)

fig.show()


In [101]:
# Group by region
region_stats = latest_rankings.groupby('region').agg(
    avg_visa_free=('visa_free_count', 'mean'),   # average visa-free count
    max_visa_free=('visa_free_count', 'max'),    # strongest passport in the region
    top_country=('country', lambda x: latest_rankings.loc[x.index, 'visa_free_count'].idxmax())
).reset_index()

# Sort by strongest passport
region_stats = region_stats.sort_values(by='max_visa_free', ascending=False)

print(region_stats)


        region  avg_visa_free  max_visa_free  top_country
2         ASIA      86.000000            193         2977
4       EUROPE     165.265306            189         3135
6      OCEANIA     124.357143            187         2439
1     AMERICAS     136.545455            184          599
5  MIDDLE EAST      77.733333            184         3633
3    CARIBBEAN     126.230769            163          279
0       AFRICA      62.259259            156         2937


In [102]:
import plotly.graph_objects as go
import pandas as pd
import plotly.express as px

# --- Example Data ---
# latest_rankings = pd.DataFrame({
#     'country': ['Singapore', 'Finland', 'France', 'Belgium', 'Denmark'],
#     'region': ['ASIA', 'EUROPE', 'EUROPE', 'EUROPE', 'EUROPE'],
#     'visa_free_count': [180, 175, 174, 173, 173]
# })

# --- Color Mapping ---
regions = latest_rankings['region'].unique()
colors = px.colors.qualitative.Plotly[:len(regions)]
region_colors = dict(zip(regions, colors))

# --- Box Plot per Region ---
box_traces = []
for region in regions:
    df = latest_rankings[latest_rankings['region'] == region]
    box = go.Box(
        y=df['visa_free_count'],
        name=region,
        boxpoints='outliers',
        marker=dict(size=7, color=region_colors[region]),
        line=dict(color=region_colors[region], width=2),
        fillcolor='rgba(0,0,0,0)',
        boxmean='sd',
        customdata=df['country'],
        hovertemplate=(
            "<b>Region:</b> " + region +
            "<br><b>Country:</b> %{customdata}" +
            "<br><b>Visa-Free:</b> %{y}<extra></extra>"
        )
    )
    box_traces.append(box)

# --- Regional Average Bar ---
region_stats = latest_rankings.groupby('region', as_index=False).agg(avg_visa_free=('visa_free_count','mean'))
bar_trace = go.Bar(
    x=region_stats['region'],
    y=region_stats['avg_visa_free'],
    name='Regional Average',
    marker_color=[region_colors[r] for r in region_stats['region']],
    text=region_stats['avg_visa_free'].round(1),
    textposition='outside',
    hovertemplate="Region: %{x}<br>Avg Visa-Free: %{y}<extra></extra>"
)

# --- Country-level Bars per Region ---
region_bar_traces = []
for region in regions:
    df = latest_rankings[latest_rankings['region']==region].sort_values('visa_free_count', ascending=False)
    trace = go.Bar(
        x=df['country'],
        y=df['visa_free_count'],
        name=f'{region} Countries',
        marker_color=region_colors[region],
        text=df['visa_free_count'],
        textposition='outside',
        hovertemplate="Country: %{x}<br>Visa-Free: %{y}<extra></extra>"
    )
    region_bar_traces.append(trace)

# --- Combine Traces ---
fig = go.Figure(data=box_traces + [bar_trace] + region_bar_traces)

# Only box plots visible initially
for i in range(len(box_traces), len(fig.data)):
    fig.data[i].visible = False

# --- Buttons ---
buttons = []

# Box Plot
buttons.append(dict(
    label='📦 Box Plot',
    method='update',
    args=[{'visible':[True]*len(box_traces)+[False]*(1+len(region_bar_traces))},
          {'title':'Visa-Free Distribution by Region',
           'yaxis':{'type':'log','title':'Visa-Free Destinations'}}]
))

# Regional Average
visibility = [False]*len(box_traces) + [True] + [False]*len(region_bar_traces)
buttons.append(dict(
    label='🌍 Regional Average',
    method='update',
    args=[{'visible':visibility},
          {'title':'Average Visa-Free by Region',
           'yaxis':{'type':'log','title':'Visa-Free Destinations'}}]
))

# Country-level per Region
for i, region in enumerate(regions):
    visibility = [False]*(len(box_traces)+1) + [False]*len(region_bar_traces)
    visibility[len(box_traces)+1+i] = True
    buttons.append(dict(
        label=f'🏳️ {region}',
        method='update',
        args=[{'visible':visibility},
              {'title':f'Visa-Free Access by Country in {region}',
               'yaxis':{'type':'log','title':'Visa-Free Destinations'}}]
    ))

# --- Layout ---
fig.update_layout(
    width=1400,
    height=800,
    updatemenus=[dict(
        type='dropdown',
        buttons=buttons,
        x=1.02,           # move to top-right
        xanchor='left',
        y=1.15,
        yanchor='top'
    )],
    template='plotly_white',
    margin=dict(t=120, l=80, r=100, b=100),
    font=dict(size=15, family='Arial'),
    plot_bgcolor='rgba(245,245,245,1)',
    paper_bgcolor='white',
    showlegend=False,
    title=dict(
        text='Visa-Free Access Analysis by Region and Country',
        font=dict(size=22, family='Arial Black'),
        x=0.5
    ),
    xaxis=dict(showgrid=True, gridcolor='lightgrey', linecolor='grey', ticks='outside'),
    yaxis=dict(showgrid=True, gridcolor='lightgrey', linecolor='grey', ticks='outside', type='log')
)

fig.show()

