# Day 3: Polygons

Choropleth maps of Google Trends data. Cat or dog search volumes by country.

In [2]:
import pandas as pd
import pycountry

## Clean data

In [17]:
# Clean country names
def clean_country(name):
    if pd.isna(name):
        return ""
    return name.strip()

# Match to ISO alpha-3 codes
def get_alpha_3(name):
    try:
        # Try direct match
        country = pycountry.countries.lookup(name)
        return country.alpha_3
    except LookupError:
        # Handle common edge cases
        manual_map = {
            "Congo - Brazzaville": "COG",
            "Congo - Kinshasa": "COD",
            "Czechia": "CZE",
            "Myanmar (Burma)": "MMR",
            "Palestine": "PSE",
            "São Tomé & Príncipe": "STP",
            "St Kitts & Nevis": "KNA",
            "St Lucia": "LCA",
            "St Vincent & the Grenadines": "VCT",
            "St Pierre & Miquelon": "SPM",
            "St Martin": "MAF",
            "St Barthélemy": "BLM",
            "US Virgin Islands": "VIR",
            "British Virgin Islands": "VGB",
            "US Outlying Islands": "UMI",
            "Caribbean Netherlands": "BES",
            "Åland Islands": "ALA",
            "Western Sahara": "ESH",
            "French Guiana": "GUF",
            "French Polynesia": "PYF",
            "French Southern Territories": "ATF",
            "Faroe Islands": "FRO",
            "Guadeloupe": "GLP",
            "Martinique": "MTQ",
            "Mayotte": "MYT",
            "Réunion": "REU",
            "Hong Kong": "HKG",
            "Macao": "MAC",
            "Taiwan": "TWN",
            "Kosovo": "XKX",
            "Vatican City": "VAT",
            "Pitcairn Islands": "PCN",
            "Heard & McDonald Islands": "HMD",
            "South Georgia & South Sandwich Islands": "SGS",
            "Cocos (Keeling) Islands": "CCK",
            "Christmas Island": "CXR",
            "British Indian Ocean Territory": "IOT",
            "Antarctica": "ATA",
            "Niue": "NIU",
            "Tokelau": "TKL",
            "Norfolk Island": "NFK",
            "Wallis & Futuna": "WLF",
            "New Caledonia": "NCL",
            "Cook Islands": "COK",
            "Montserrat": "MSR",
            "Bonaire": "BES",
            "Sint Maarten": "SXM",
            "Curacao": "CUW",
            "Russia": "RUS",
            "Brunei": "BRN",
            "Falkland Islands (Islas Malvinas)": "FLK",
            "St Helena": "SHN",
            "Trinidad & Tobago": "TTO",
            "Turks & Caicos Islands": "TCA",
            "Svalbard & Jan Mayen": "SJM",
            "Antigua & Barbuda": "ATG",
            "Micronesia": "FSM",
            "Bosnia & Herzegovina": "BIH",
            "Cape Verde": "CPV",
            "Côte d’Ivoire": "CIV"
        }
        return manual_map.get(name, None)

In [None]:
#clean and apply ISO codes
df = pd.read_csv("cat_dog_all.csv", skiprows=2)
df["Country_clean"] = df["Country"].apply(clean_country)
df["ISO_alpha_3"] = df["Country_clean"].apply(get_alpha_3)

# Optional: save to new CSV
# df.to_csv("geoMap_with_ISO.csv", index=False)

          Country ISO_alpha_3
0         Romania         ROU
1        Kiribati         KIR
2  United Kingdom         GBR
3   United States         USA
4       Australia         AUS


In [30]:
df

Unnamed: 0,Country,cat: (2025),dog: (2025),Country_clean,ISO_alpha_3
0,Romania,91%,9%,Romania,ROU
1,Kiribati,15%,85%,Kiribati,KIR
2,United Kingdom,37%,63%,United Kingdom,GBR
3,United States,37%,63%,United States,USA
4,Australia,37%,63%,Australia,AUS
...,...,...,...,...,...
245,Pitcairn Islands,,,Pitcairn Islands,PCN
246,Tokelau,,,Tokelau,TKL
247,Heard & McDonald Islands,,,Heard & McDonald Islands,HMD
248,Nauru,,,Nauru,NRU


## Explore