In [583]:
# Libraries
import pandas as pd
import numpy as np
import io
import json
import os

In [584]:
# Read the excel
df = pd.read_excel('./raw_data/Europe-Central-Asia_aggregated_data_up_to-2026-01-10.xlsx')

In [585]:
# Added column YEAR, extracted from the column WEEK
df['WEEK'] = pd.to_datetime(df['WEEK'])
df['YEAR'] = df['WEEK'].dt.year

#### Remove Countries not presented in the project

In [586]:
countries = [
    'Akrotiri and Dhekelia',
    'Bailiwick of Guernsey',
    'Bailiwick of Jersey',
    'Isle of Man',
    'Greenland'
]


df_filtrato = df[~df['COUNTRY'].isin(countries)]
df_filtrato = df_filtrato.reset_index(drop=True)

#### Added column ISO

In [587]:
# Retrive list of unique countries in the filtered dataframe
list_countries = df_filtrato['COUNTRY'].unique().tolist()
print(list_countries)

['Albania', 'Andorra', 'Armenia', 'Austria', 'Azerbaijan', 'Belarus', 'Belgium', 'Bosnia and Herzegovina', 'Bulgaria', 'Croatia', 'Cyprus', 'Czech Republic', 'Denmark', 'Estonia', 'Faroe Islands', 'Finland', 'France', 'Georgia', 'Germany', 'Gibraltar', 'Greece', 'Hungary', 'Iceland', 'Ireland', 'Italy', 'Kazakhstan', 'Kosovo', 'Kyrgyzstan', 'Latvia', 'Liechtenstein', 'Lithuania', 'Luxembourg', 'Malta', 'Moldova', 'Monaco', 'Montenegro', 'Netherlands', 'North Macedonia', 'Norway', 'Poland', 'Portugal', 'Romania', 'Russia', 'San Marino', 'Serbia', 'Slovakia', 'Slovenia', 'Spain', 'Sweden', 'Switzerland', 'Tajikistan', 'Turkmenistan', 'Ukraine', 'United Kingdom', 'Uzbekistan', 'Vatican City']


In [588]:
countries_iso3 = {
    'Albania': 'ALB',
    'Andorra': 'AND',
    'Armenia': 'ARM',
    'Austria': 'AUT',
    'Azerbaijan': 'AZE',
    'Belarus': 'BLR',
    'Belgium': 'BEL',
    'Bosnia and Herzegovina': 'BIH',
    'Bulgaria': 'BGR',
    'Croatia': 'HRV',
    'Cyprus': 'CYP',
    'Czech Republic': 'CZE',
    'Denmark': 'DNK',
    'Estonia': 'EST',
    'Faroe Islands': 'FRO',
    'Finland': 'FIN',
    'France': 'FRA',
    'Georgia': 'GEO',
    'Germany': 'DEU',
    'Gibraltar': 'GIB',
    'Greece': 'GRC',
    'Hungary': 'HUN',
    'Iceland': 'ISL',
    'Ireland': 'IRL',
    'Italy': 'ITA',
    'Kazakhstan': 'KAZ',
    'Kosovo': 'XKO',
    'Kyrgyzstan': 'KGZ',
    'Latvia': 'LVA',
    'Liechtenstein': 'LIE',
    'Lithuania': 'LTU',
    'Luxembourg': 'LUX',
    'Malta': 'MLT',
    'Moldova': 'MDA',
    'Monaco': 'MCO',
    'Montenegro': 'MNE',
    'Netherlands': 'NLD',
    'North Macedonia': 'MKD',
    'Norway': 'NOR',
    'Poland': 'POL',
    'Portugal': 'PRT',
    'Romania': 'ROU',
    'Russia': 'RUS',
    'San Marino': 'SMR',
    'Serbia': 'SRB',
    'Slovakia': 'SVK',
    'Slovenia': 'SVN',
    'Spain': 'ESP',
    'Sweden': 'SWE',
    'Switzerland': 'CHE',
    'Tajikistan': 'TJK',
    'Turkmenistan': 'TKM',
    'Ukraine': 'UKR',
    'United Kingdom': 'GBR',
    'Uzbekistan': 'UZB',
    'Vatican City': 'VAT'
}

In [589]:
df_filtrato['ISO'] = df_filtrato['COUNTRY'].map(countries_iso3)

missing_countries = df_filtrato[df_filtrato['ISO'].isna()]

if not missing_countries.empty:
    print(missing_countries['COUNTRY'].unique())

Cleaning of the dataframe

In [590]:
df_filtrato = df_filtrato[df_filtrato['EVENT_TYPE'] != 'Strategic developments']
df_filtrato = df_filtrato.drop(columns=['ID'])

## Fix Regions for All Countries (ADMIN1)

In [591]:
df_filtrato['GID_1'] = ""

In [592]:
def riempi_gid_1(df, geojson_folder_path):

    gid_lookup = {}

    paesi_nel_dataset = df['ISO'].unique()

    for iso_code in paesi_nel_dataset:

        file_name = f"{iso_code}.geojson"
        file_path = os.path.join(geojson_folder_path, file_name)

        if os.path.exists(file_path):
            try:
                with open(file_path, 'r', encoding='utf-8') as f:
                    data = json.load(f)

                gid_lookup[iso_code] = {}

                for feature in data.get('features', []):
                    props = feature.get('properties', {})
                    name_1 = props.get('NAME_1')
                    gid_1 = props.get('GID_1')

                    if name_1 and gid_1:
                        gid_lookup[iso_code][name_1] = gid_1

            except Exception as e:
                print(f"Errore nella lettura di {file_name}: {e}")
        else:
            print(f"Attenzione: File {file_name} non trovato nella cartella specificata.")

    print("Done")

    
    def get_gid_value(row):
        iso = row['ISO']
        region = row['ADMIN1']

        
        if pd.isna(region) or region == "":
            return np.nan

        if iso in gid_lookup:
            if region in gid_lookup[iso]:
                return gid_lookup[iso][region]

        return np.nan

    df_result = df.copy()
    df_result['GID_1'] = df_result.apply(get_gid_value, axis=1)

    return df_result

In [593]:

cartella_geojson = "../../data/final/geojson/countries_choropleth"

if 'df_filtrato' in locals():
    if not os.path.exists(cartella_geojson):
        print(f"Error: The specified folder '{cartella_geojson}' does not exist. Please check the path.")

    df_filtrato = riempi_gid_1(df_filtrato, cartella_geojson)


Done


# Num Geojson == Num Datasets

## ALB

In [594]:
ALB_fix = {
    'Diber': 'Dibër',
    'Durres': 'Durrës',
    'Gjirokaster': 'Gjirokastër',
    'Korce': 'Korçë',
    'Kukes': 'Kukës',
    'Lezhe': 'Lezhë',
    'Shkoder': 'Shkodër',
    'Tirana': 'Tiranë',
    'Vlore': 'Vlorë'
}

## ARM

In [595]:
ARM_fix = {
    'Vayots Dzor': 'VayotsDzor',
    'Yerevan': 'Erevan'
}

## AUT

In [596]:
AUT_fix = {
    'Karnten': 'Kärnten',
    'Niederosterreich': 'Niederösterreich',
    'Oberosterreich': 'Oberösterreich'
}

## BEL

In [597]:
BEL_fix = {
    'Brussels': 'Bruxelles'
}

## BGR

In [598]:
BGR_fix = {
    'Sofia City': 'GradSofiya',
    'Stara Zagora': 'StaraZagora',
    'Veliko Tarnovo': 'VelikoTarnovo'
}

## BIH

In [599]:
BIH_fix = {
    'Brcko': 'Brčko',
    'Federation of Bosnia and Herzegovina': 'FederacijaBosnaiHercegovina',
    'Republika Srpska': 'RepuplikaSrpska'
}

## CZE

In [600]:
CZE_fix = {
    'Central Bohemia': 'Středočeský',
    'Hradec Kralove': 'Královéhradecký',
    'Karlovy Vary': 'Karlovarský',
    'Liberec': 'Liberecký',
    'Moravian-Silesian': 'Moravskoslezský',
    'Olomouc': 'Olomoucký',
    'Pardubice': 'Pardubický',
    'Plzen': 'Plzeňský',
    'South Bohemia': 'Jihočeský',
    'South Moravian': 'Jihomoravský',
    'Usti nad Labem': 'Ústecký',
    'Vysocina': 'KrajVysočina',
    'Zlin': 'Zlínský'
}

## EST

In [601]:
EST_fix = {
    'Jarva': 'Järva',
    'Jogeva': 'Jõgeva',
    'Laane': 'Lääne',
    'Laane-Viru': 'Lääne-Viru',
    'Parnu': 'Pärnu',
    'Voru': 'Võru'
}

HRV

In [602]:
HRV_fix = {
    'Bjelovar-Bilogora': 'Bjelovarska-Bilogorska',
    'Brod-Posavina': 'Brodsko-Posavska',
    'City of Zagreb': 'GradZagreb',
    'Dubrovnik-Neretva': 'Dubrovacko-Neretvanska',
    'Istria': 'Istarska',
    'Karlovac': 'Karlovacka',
    'Koprivnica-Krizevci': 'Koprivničko-Križevačka',
    'Krapina-Zagorje': 'Krapinsko-Zagorska',
    'Lika-Senj': 'Licko-Senjska',
    'Medimurje': 'Medimurska',
    'Osijek-Baranja': 'Osjecko-Baranjska',
    'Pozega-Slavonia': 'Požeško-Slavonska',
    'Primorje-Gorski Kotar': 'Primorsko-Goranska',
    'Sibenik-Knin': 'Šibensko-Kninska',
    'Sisak-Moslavina': 'Sisacko-Moslavacka',
    'Split-Dalmatia': 'Splitsko-Dalmatinska',
    'Varazdin': 'Varaždinska',
    'Virovitica-Podravina': 'Viroviticko-Podravska',
    'Vukovar-Sirmium': 'Vukovarsko-Srijemska',
    'Zadar': 'Zadarska',
    'Zagreb': 'Zagrebačka'
}

## HUN

In [603]:
HUN_fix = {
    'Bacs-Kiskun': 'Bács-Kiskun',
    'Bekes': 'Békés',
    'Borsod-Abauj-Zemplen': 'Borsod-Abaúj-Zemplén',
    'Csongrad': 'Csongrád',
    'Fejer': 'Fejér',
    'Hajdu-Bihar': 'Hajdú-Bihar',
    'Jasz-Nagykun-Szolnok': 'Jász-Nagykun-Szolnok',
    'Komarom-Esztergom': 'Komárom-Esztergom',
    'Nograd': 'Nógrád',
    'Szabolcs-Szatmar-Bereg': 'Szabolcs-Szatmár-Bereg',
    'Veszprem': 'Veszprém'
}

## IRL

In [604]:
IRL_fix = {
    'Limerick City and County': 'Limerick',
    'Waterford City and County': 'Waterford',
}

## ITA

In [605]:
ITA_fix = {
    'Friuli-Venezia Giulia': 'Friuli-VeneziaGiulia',
    'Puglia': 'Apulia',
    'Sicilia': 'Sicily',
    'Trentino-Alto Adige': 'Trentino-AltoAdige',
    "Valle d'Aosta": "Valled'Aosta"
}

## KGZ

In [606]:
KGZ_fix = {
    'Bishkek': 'Biškek',
    'Chui': 'Chüy',
    'Issyk-Kul': 'Ysyk-Köl',
    'Osh City': 'Osh(city)'
}

## PRT

In [607]:
PRT_fix = {
    'Braganca': 'Bragança',
    'Castelo Branco': 'CasteloBranco',
    'Evora': 'Évora',
    'Santarem': 'Santarém',
    'Setubal': 'Setúbal',
    'Viana do Castelo': 'VianadoCastelo',
    'Vila Real': 'VilaReal'
}

## SRB

In [608]:
SRB_fix = {
    'Belgrade': 'GradBeograd',
    'Bor': 'Borski',
    'Branicevo': 'Braničevski',
    'Central Banat': 'Srednje-Banatski',
    'Jablanica': 'Jablanički',
    'Kolubara': 'Kolubarski',
    'Macva': 'Mačvanski',
    'Moravica': 'Moravički',
    'Nisava': 'Nišavski',
    'North Backa': 'Severno-Bački',
    'North Banat': 'Severno-Banatski',
    'Pcinja': 'Pčinjski',
    'Pirot': 'Pirotski',
    'Podunavlje': 'Podunavski',
    'Pomoravlje': 'Pomoravski',
    'Rasina': 'Rasinski',
    'Raska': 'Raški',
    'South Backa': 'Južno-Bački',
    'South Banat': 'Južno-Banatski',
    'Srem': 'Sremski',
    'Sumadija': 'Šumadijski',
    'Toplica': 'Toplički',
    'West Backa': 'Zapadno-Bački',
    'Zajecar': 'Zaječarski',
    'Zlatibor': 'Zlatiborski'
}

## SVK

In [609]:
SVK_fix = {
    'Banskobystricky': 'Banskobystrický',
    'Bratislavsky': 'Bratislavský',
    'Kosicky': 'Košický',
    'Presovsky': 'Prešovský',
    'Trenciansky': 'Trenčiansky',
    'Trnavsky': 'Trnavský',
    'Zilinsky': 'Žilinský'
}

## TJK

In [610]:
TJK_fix = {
    'Districts of Republican Subordination': 'DistrictsofRepublicanSubordin',
    'Gorno-Badakhshan Autonomous': 'Gorno-Badakhshan'
}

## UZB

In [611]:
UZB_fix = {
    'Andizhan': 'Andijon',
    'Bukhara': 'Buxoro',
    'Dzhizak': 'Jizzax',
    'Fergana': "Farg'ona",
    'Kashkadarya': 'Qashqadaryo',
    'Khorezm': 'Xorazm',
    'Navoi': 'Navoiy',
    'Republic of Karakalpakstan': 'Qaraqalpaqstan',
    'Samarkand': "Samarqand'",
    'Surkhandarya': 'Surxondaryo',
    'Tashkent': 'Toshkent',
    'Tashkent City': 'ToshkentShahri'
}

## XKO

In [612]:
XKO_fix = {
    'Ferizaj': 'Uroševac',
    'Gjakova': 'Đakovica',
    'Gjilan': 'Gnjilane',
    'Mitrovica': 'KosovskaMitrovica',
    'Peja': 'Pećki',
    'Prizreni': 'Prizren'
}

# Num Geojson > Num Dataset

## CHE

In [613]:
CHE_fix = {
    'Appenzell Ausserrhoden': 'AppenzellAusserrhoden',
    'Geneve': 'Genève',
    'Graubunden': 'Graubünden',
    'Luzern': 'Lucerne',
    'Neuchatel': 'Neuchâtel',
    'St. Gallen': 'SanktGallen',
    'Zurich': 'Zürich'
}

## NLD

In [614]:
NLD_fix = {
    'Friesland': 'Fryslân'
}


## TKM

In [615]:
TKM_fix = {
    'Ashgabat': 'Ahal',
    'Dasoguz': 'Daşoguz'
}

## MKD

In [616]:
MKD_fix = {
    'Bogdantsi': 'Bogdanci',
    'Brvenitsa': 'Brvenica',
    'Chashka': 'Čaška',
    'Cheshinovo-Obleshevo': 'Češinovo-Obleševo',
    'Chucher-Sandevo': 'ČučerSandevo',
    'Debrtsa': 'Debarca',
    'Delchevo': 'Delčevo',
    'Demir Hisar': 'DemirHisar',
    'Demir Kapija': 'DemirKapija',
    'Dojran': 'StarDojran',
    'Karbintsi': 'Karbinci',
    'Kichevo': 'Kičevo',
    'Kisela Voda': 'KiselaVoda',
    'Kochani': 'Kočani',
    'Kriva Palanka': 'KrivaPalanka',
    'Krushevo': 'Kruševo',
    'Makedonska Kamenitsa': 'MakedonskaKamenica',
    'Makedonski Brod': 'MakedonskiBrod',
    'Mavrovo and Roshtushe': 'MavrovoandRostuša',
    'Novo Selo': 'NovoSelo',
    'Pehchevo': 'Pehčevo',
    'Radovish': 'Radoviš',
    'Shtip': 'Štip',
    'Studenichani': 'Studeničani',
    'Sveti Nikole': 'SvetiNikole',
    'Teartse': 'Tearce',
    'Zhelino': 'Želino'
}

# Num Dataset > Num Geojson

## GBR

In [617]:
GBR_fix = {
    'Northern Ireland': 'NorthernIreland',
}

## AZE

In [618]:
AZE_fix = {
    'Baku': 'Absheron',
    'Sumqayit': 'Absheron',
    'Absheron': 'Absheron',
    'Khizi': 'Absheron',
    'Astara': 'Lankaran',
    'Jalilabad': 'Lankaran',
    'Lankaran': 'Lankaran',
    'Lerik': 'Lankaran',
    'Masally': 'Lankaran',
    'Yardymli': 'Lankaran',
    'Babek': 'Nakhchivan',
    'Julfa': 'Nakhchivan',
    'Nakhchivan': 'Nakhchivan',
    'Ordubad': 'Nakhchivan',
    'Sadarak': 'Nakhchivan',
    'Shakhbuz': 'Nakhchivan',
    'Sharur': 'Nakhchivan',
    'Kangarli': 'Nakhchivan',
    'Agdash': 'Aran',
    'Barda': 'Aran',
    'Beylagan': 'Aran',
    'Bilasuvar': 'Aran',
    'Goychay': 'Aran',
    'Imishli': 'Aran',
    'Kurdamir': 'Aran',
    'Mingachevir': 'Aran',
    'Neftchala': 'Aran',
    'Saatly': 'Aran',
    'Salyan': 'Aran',
    'Yevlakh': 'Aran',
    'Zardab': 'Aran',
    'Hajigabul': 'Aran',
    'Aghjabadi': 'Aran',
    'Agstafa': 'Ganja-Qazakh',
    'Dashkasan': 'Ganja-Qazakh',
    'Gadabay': 'Ganja-Qazakh',
    'Ganja': 'Ganja-Qazakh',
    'Goranboy': 'Ganja-Qazakh',
    'Goygol': 'Ganja-Qazakh',
    'Naftalan': 'Ganja-Qazakh',
    'Qazakh': 'Ganja-Qazakh',
    'Shamkir': 'Ganja-Qazakh',
    'Samukh': 'Ganja-Qazakh',
    'Tovuz': 'Ganja-Qazakh',
    'Balakan': 'Shaki-Zaqatala',
    'Oghuz': 'Shaki-Zaqatala',
    'Qabala': 'Shaki-Zaqatala',
    'Qakh': 'Shaki-Zaqatala',
    'Shaki': 'Shaki-Zaqatala',
    'Zaqatala': 'Shaki-Zaqatala',
    'Khachmaz': 'Quba-Khachmaz',
    'Quba': 'Quba-Khachmaz',
    'Qusar': 'Quba-Khachmaz',
    'Shabran': 'Quba-Khachmaz',
    'Siazan': 'Quba-Khachmaz',
    'Kelbecer': 'Kalbajar-Lachin',
    'Lacin': 'Kalbajar-Lachin',
    'Qubadli': 'Kalbajar-Lachin',
    'Zengilan': 'Kalbajar-Lachin',
    'Agdam': 'Yukhari-Karabakh',
    'Fizuli': 'Yukhari-Karabakh',
    'Jabrayil': 'Yukhari-Karabakh',
    'Khojavend': 'Yukhari-Karabakh',
    'Shusha': 'Yukhari-Karabakh',
    'Stepanakert': 'Yukhari-Karabakh',
    'Tartar': 'Yukhari-Karabakh',
    'Xocali': 'Yukhari-Karabakh'
}


## BLR

In [619]:
BLR_fix = {
    'Minsk city': 'Minsk',
}

## DEU

In [620]:
DEU_fix = {
    'Baden-Wurttemberg': 'Baden-Württemberg',
    'Thuringen': 'Thüringen'
}

## DNK

In [621]:
DNK_fix = {
    'Sjaelland': 'Sjælland',
    'Halfdan Oil and Gas Field': 'Syddanmark',
    'Roar Oil and Gas Field': 'Syddanmark',
    'Baltic Sea - Bornholm Basin': 'Hovedstaden'
}


## ESP

In [622]:
ESP_fix = {
    'Andalucia': 'Andalucía',
    'Aragon': 'Aragón',
    'Asturias': 'PrincipadodeAsturias',
    'Canarias': 'IslasCanarias',
    'Castilla y Leon': 'CastillayLeón',
    'Castilla-La Mancha': 'Castilla-LaMancha',
    'Cataluna': 'Cataluña',
    'Ceuta': 'CeutayMelilla',
    'Melilla': 'CeutayMelilla',
    'Islas Baleares': 'IslasBaleares',
    'La Rioja': 'LaRioja',
    'Madrid': 'ComunidaddeMadrid',
    'Murcia': 'RegióndeMurcia',
    'Navarra': 'ComunidadForaldeNavarra',
    'Pais Vasco': 'PaísVasco',
    'Valencia': 'ComunidadValenciana'
}

## FIN

In [623]:
FIN_fix = {
    'Uusimaa': 'SouthernFinland',
    'Kanta-Hame': 'SouthernFinland',
    'Paijat-Hame': 'SouthernFinland',
    'Kymenlaakso': 'SouthernFinland',
    'Etela-Karjala': 'SouthernFinland',
    'Varsinais-Suomi': 'WesternFinland',
    'Satakunta': 'WesternFinland',
    'Kanta-Hame': 'WesternFinland',
    'Pirkanmaa': 'WesternFinland',
    'Keski-Suomi': 'WesternFinland',
    'Etela-Pohjanmaa': 'WesternFinland',
    'Pohjanmaa': 'WesternFinland',
    'Keski-Pohjanmaa': 'WesternFinland',
    'Etela-Savo': 'EasternFinland',
    'Pohjois-Savo': 'EasternFinland',
    'Pohjois-Karjala': 'EasternFinland',
    'Pohjois-Pohjanmaa': 'Oulu',
    'Kainuu': 'Oulu',
    'Lappi': 'Lapland',
    'Gulf of Finland': 'SouthernFinland',
    'Ahvenanmaa': 'WesternFinland'
}


## FRA

In [624]:
FRA_fix = {
    'Auvergne-Rhone-Alpes': 'Auvergne-Rhône-Alpes',
    'Bourgogne-Franche-Comte': 'Bourgogne-Franche-Comté',
    'Centre-Val de Loire': 'Centre-ValdeLoire',
    'Grand Est': 'GrandEst',
    'Ile-de-France': 'Île-de-France',
    'Pays de la Loire': 'PaysdelaLoire',
    "Provence-Alpes-Cote d'Azur": "Provence-Alpes-Côted'Azur",
    'Atlantic Ocean - Bay of Biscay': 'Nouvelle-Aquitaine'
}

## GEO

In [625]:
GEO_fix = {
    'Autonomous Republic of Abkhazia': 'Abkhazia',
    'Autonomous Republic of Adjara': 'Ajaria',
    'Kvemo Kartli': 'KvemoKartli',
    'Racha-Lechkhumi and Kvemo Svaneti': 'Racha-Lechkhumi-KvemoSvaneti',
    'Samegrelo-Zemo Svaneti': 'Samegrelo-ZemoSvaneti',
    'Shida Kartli': 'ShidaKartli',
    'Tskhinvali': 'ShidaKartli',
    'Znaur': 'ShidaKartli',
    'Leningor': 'Mtskheta-Mtianeti'
}


## GRC

In [626]:
GRC_fix = {
    'Athonite State': 'Athos',
    'Epirus-Western Macedonia': 'EpirusandWesternMacedonia',
    'Macedonia-Thrace': 'MacedoniaandThrace',
    'Peloponnese-Western Greece-Ionian Islands': 'Peloponnese,WesternGreeceand',
    'Thessaly-Central Greece': 'ThessalyandCentralGreece',
    'South Aegean': 'Aegean',
    'Aegean Sea': 'Aegean',
    'Ionian Sea': 'Peloponnese,WesternGreeceand',
    'Lybian Sea': 'Crete'
}

## ISL

In [627]:
ISL_fix = {
    'Reykjavikurborg': 'Höfuðborgarsvæði',
    'Hafnarfjardarkaupstadur': 'Höfuðborgarsvæði',
    'Gardabaer': 'Höfuðborgarsvæði',
    'Seltjarnarnesbaer': 'Höfuðborgarsvæði',
    'Reykjanesbaer': 'Suðurnes',
    'Grindavikurbaer': 'Suðurnes',
    'Sudurnesjabaer': 'Suðurnes',
    'Akraneskaupstadur': 'Vesturland',
    'Borgarbyggd': 'Vesturland',
    'Grundarfjardarbaer': 'Vesturland',
    'Snaefellsbaer': 'Vesturland',
    'Stykkisholmsbaer': 'Vesturland',
    'Isafjardarbaer': 'Vestfirðir',
    'Vesturbyggd': 'Vestfirðir',
    'Sveitarfelagid Skagafjordur': 'Norðurlandvestra',
    'Sveitarfelagid Skagastrond': 'Norðurlandvestra',
    'Akureyrarbaer': 'Norðurlandeystra',
    'Nordurthing': 'Norðurlandeystra',
    'Dalvikurbyggd': 'Norðurlandeystra',
    'Fjardabyggd': 'Austurland',
    'Mulathing': 'Austurland',
    'Sveitarfelagid Arborg': 'Suðurland',
    'Sveitarfelagid Olfus': 'Suðurland'
}


## KAZ

In [628]:
KAZ_fix = {
    'Aqtobe': 'Aqtöbe',
    'East Kazakhstan': 'EastKazakhstan',
    'Kostanay': 'Qostanay',
    'Kyzylorda': 'Qyzylorda',
    'Mangystau': 'Mangghystau',
    'North Kazakhstan': 'NorthKazakhstan',
    'West Kazakhstan': 'WestKazakhstan',
    'Turkistan': 'SouthKazakhstan',
    'Shymkent': 'SouthKazakhstan',
    'Astana City': 'Aqmola',
    'Almaty City': 'Almaty'
}


## LTU

In [629]:
LTU_fix = {
    'Alytus': 'Alytaus',
    'Siauliu': 'Šiauliai',
    'Telsiu': 'Telšiai',
    'Coast of Klaipeda': 'Klaipedos'
}

## LUX

In [630]:
LUX_fix = {
    'Capellen': 'Luxembourg',
    'Esch-sur-Alzette': 'Luxembourg',
    'Clervaux': 'Diekirch',
    'Echternach': 'Grevenmacher',
    'Remich': 'Grevenmacher'
}


## LVA

In [631]:
LVA_fix = {
    'Kuldigas': 'Kurzeme',
    'Liepaja': 'Kurzeme',
    'Saldus': 'Kurzeme',
    'Talsu': 'Kurzeme',
    'Tukuma': 'Kurzeme',
    'Ventspils': 'Kurzeme',
    'Kandavas': 'Kurzeme',
    'Rojas': 'Kurzeme',
    'Skrundas': 'Kurzeme',
    'Daugavpils': 'Latgale',
    'Kraslavas': 'Latgale',
    'Rezekne': 'Latgale',
    'Rezeknes': 'Latgale',
    'Zilupes': 'Latgale',
    'Gulbenes': 'Vidzeme',
    'Limbazu': 'Vidzeme',
    'Ogres': 'Vidzeme',
    'Valkas': 'Vidzeme',
    'Valmiera': 'Vidzeme',
    'Keguma': 'Vidzeme',
    'Ropazu': 'Vidzeme',
    'Saulkrastu': 'Vidzeme',
    'Strencu': 'Vidzeme',
    'Bauskas': 'Zemgale',
    'Dobeles': 'Zemgale',
    'Jelgava': 'Zemgale',
    'Aizkraukles': 'Zemgale',
    'Jekabpils': 'Zemgale'
}


## MDA

In [632]:
MDA_fix = {
    'Anenii Noi': 'AneniiNoi',
    'Balti': 'Bălţi',
    'Chisinau': 'Chişinău',
    'Cimislia': 'Cimişlia',
    'Dubasari': 'Dubăsari',
    'Edinet': 'Edineţ',
    'Falesti': 'Făleşti',
    'Floresti': 'Floreşti',
    'Gagauzia': 'Găgăuzia',
    'Hincesti': 'Hîncesti',
    'Ocnita': 'Ocniţa',
    'Riscani': 'Rîşcani',
    'Singerei': 'Sîngerei',
    'Soldanesti': 'Şoldăneşti',
    'Stefan Voda': 'ŞtefanVoda',
    'Straseni': 'Străşeni',
    'Telenesti': 'Teleneşti',
    'Camenca': 'Transnistria',
    'Grigoriopol': 'Transnistria',
    'Ribnita': 'Transnistria',
    'Slobozia': 'Transnistria',
    'Tiraspol': 'Transnistria'
}


## MLT

In [633]:
MLT_fix = {
    'Central': 'Ċentrali',
    'Gozo': 'Għawdex',
    'Northern': 'Tramuntana',
    'South Eastern': 'Xlokk',
    'Southern': 'Nofsinhar',
}


## MNE

In [634]:
MNE_fix = {
    'Bijelo Polje': 'BijeloPolje',
    'Herceg Novi': 'HercegNovi',
    'Herceg Novi': 'HercegNovi',
    'Kolasin': 'Kolašin',
    'Niksic': 'Nikšic',
    'Pluzine': 'Plužine',
    'Rozaje': 'Rožaje',
    'Savnik': 'Šavnik',
    'Zabljak': 'Žabljak',
    'Petnjica': 'Berane',
    'Gusinje': 'Plav',
    'Tuzi': 'Podgorica'
}

## NOR

In [635]:
NOR_fix = {
    'More og Romsdal': 'MøreogRomsdal',
}

## POL

In [636]:
POL_fix = {
    'Lodzkie': 'Łódzkie',
    'Lodzkie': 'Łódzkie',
    'Malopolskie': 'Małopolskie',
    'Slaskie': 'Śląskie',
    'Swietokrzyskie': 'Świętokrzyskie',
    'Warminsko-Mazurskie': 'Warmińsko-Mazurskie',
    'Bay of Gdansk': 'Kujawsko-Pomorskie'
}

## ROU

In [637]:
ROU_fix = {
    'Arges': 'Argeș',
    'Bacau': 'Bacău',
    'Bistrita-Nasaud': 'Bistrița-Năsăud',
    'Botosani': 'Botoșani',
    'Braila': 'Brăila',
    'Brasov': 'Brașov',
    'Buzau': 'Buzău',
    'Calarasi': 'Călărași',
    'Caras-Severin': 'Caraș-Severin',
    'Constanta': 'Constanța',
    'Dambovita': 'Dâmbovița',
    'Galati': 'Galați',
    'Ialomita': 'Ialomița',
    'Iasi': 'Iași',
    'Maramures': 'Maramureș',
    'Mehedinti': 'Mehedinți',
    'Mures': 'Mureș',
    'Neamt': 'Neamț',
    'Salaj': 'Sălaj',
    'Satu Mare': 'SatuMare',
    'Timis': 'Timiș',
    'Valcea': 'Vâlcea',
    'Western Black Sea': 'Tulcea'
}


## RUS

In [638]:
RUS_fix = {
    'Moscow': 'MoscowCity',
    'Moscow Oblast': 'Moskva',
    'Altai': 'Altay',
    'Republic of Altai': 'Gorno-Altay',
    'Saint Petersburg': 'CityofSt.Petersburg',
    'Republic of Adygea': 'Adygey',
    'Republic of Bashkortostan': 'Bashkortostan',
    'Republic of Buryatia': 'Buryat',
    'Republic of Chechnya': 'Chechnya',
    'Republic of Chuvash': 'Chuvash',
    'Republic of Dagestan': 'Dagestan',
    'Republic of Ingushetia': 'Ingush',
    'Republic of Kabardino-Balkaria': 'Kabardin-Balkar',
    'Republic of Kalmykia': 'Kalmyk',
    'Republic of Karachay-Cherkessia': 'Karachay-Cherkess',
    'Republic of Karelia': 'Karelia',
    'Republic of Khakassia': 'Khakass',
    'Republic of Komi': 'Komi',
    'Republic of Mari El': 'Mariy-El',
    'Republic of Mordovia': 'Mordovia',
    'Republic of North Ossetia-Alania': 'NorthOssetia',
    'Republic of Sakha': 'Sakha',
    'Republic of Tatarstan': 'Tatarstan',
    'Republic of Tuva': 'Tuva',
    'Udmurt Republic': 'Udmurt',
    'Arkhangelsk': "Arkhangel'sk",
    'Astrakhan': "Astrakhan'",
    'Chukotka': 'Chukot',
    'Jewish Autonomous Oblast': 'Yevrey',
    'Khanty-Mansi': 'Khanty-Mansiy',
    'Nizhny Novgorod': 'Nizhegorod',
    'Oryol': 'Orel',
    'Perm': "Perm'",
    'Primorskiy': "Primor'ye",
    'Ryazan': "Ryazan'",
    'Stavropol': "Stavropol'",
    'Tver': "Tver'",
    'Tyumen': "Tyumen'",
    'Ulyanovsk': "Ul'yanovsk",
    'Yamalo-Nenets': 'Yamal-Nenets',
    'Yaroslavl': "Yaroslavl'",
    'Zabaykalskiy': "Zabaykal'ye",
    'Gulf of Finland': 'CityofSt.Petersburg',
    'Southern Barents Sea': "Murmansk"
}


## SVN

In [639]:
SVN_fix = {
    'Ljubljana': 'Osrednjeslovenska',
    'Grosuplje': 'Osrednjeslovenska',
    'Kamnik': 'Osrednjeslovenska',
    'Maribor': 'Podravska',
    'Ptuj': 'Podravska',
    'Slovenska Bistrica': 'Podravska',
    'Hoce-Slivnica': 'Podravska',
    'Lenart': 'Podravska',
    'Markovci': 'Podravska',
    'Poljcane': 'Podravska',
    'Sredisce ob Dravi': 'Podravska',
    'Zavrc': 'Podravska',
    'Sentilj': 'Podravska',
    'Cirkulane': 'Podravska',
    'Celje': 'Savinjska',
    'Velenje': 'Savinjska',
    'Zalec': 'Savinjska',
    'Sentjur': 'Savinjska',
    'Slovenske Konjice': 'Savinjska',
    'Sostanj': 'Savinjska',
    'Vojnik': 'Savinjska',
    'Solcava': 'Savinjska',
    'Braslovce': 'Savinjska',
    'Kranj': 'Gorenjska',
    'Bled': 'Gorenjska',
    'Jesenice': 'Gorenjska',
    'Radovljica': 'Gorenjska',
    'Skofja Loka': 'Gorenjska',
    'Trzic': 'Gorenjska',
    'Kranjska Gora': 'Gorenjska',
    'Gorje': 'Gorenjska',
    'Gorenja vas-Poljane': 'Gorenjska',
    'Koper': 'Obalno-kraška',
    'Izola': 'Obalno-kraška',
    'Piran': 'Obalno-kraška',
    'Ankaran': 'Obalno-kraška',
    'Hrpelje-Kozina': 'Obalno-kraška',
    'Sezana': 'Obalno-kraška',
    'Nova Gorica': 'Goriška',
    'Sempeter-Vrtojba': 'Goriška',
    'Kanal': 'Goriška',
    'Novo mesto': 'JugovzhodnaSlovenija',
    'Kocevje': 'JugovzhodnaSlovenija',
    'Crnomelj': 'JugovzhodnaSlovenija',
    'Metlika': 'JugovzhodnaSlovenija',
    'Ribnica': 'JugovzhodnaSlovenija',
    'Sentjernej': 'JugovzhodnaSlovenija',
    'Sentrupert': 'JugovzhodnaSlovenija',
    'Kostel': 'JugovzhodnaSlovenija',
    'Murska Sobota': 'Pomurska',
    'Lendava': 'Pomurska',
    'Ljutomer': 'Pomurska',
    'Gornja Radgona': 'Pomurska',
    'Radenci': 'Pomurska',
    'Slovenj Gradec': 'Koroška',
    'Mislinja': 'Koroška',
    'Krsko': 'Spodnjeposavska',
    'Brezice': 'Spodnjeposavska',
    'Radece': 'Spodnjeposavska',
    'Trbovlje': 'Zasavska',
    'Zagorje ob Savi': 'Zasavska',
    'Hrastnik': 'Zasavska',
    'Ilirska Bistrica': 'Notranjsko-kraška'
}


## SWE

In [640]:
SWE_fix = {
    'Dalarnas': 'Dalarna',
    'Gavleborgs': 'Gävleborg',
    'Gotlands': 'Gotland',
    'Hallands': 'Halland',
    'Jamtlands': 'Jämtland',
    'Jonkopings': 'Jönköping',
    'Kronobergs': 'Kronoberg',
    'Norrbottens': 'Norrbotten',
    'Ostergotlands': 'Östergötland',
    'Skane': 'Skåne',
    'Sodermanlands': 'Södermanland',
    'Stockholms': 'Stockholm',
    'Varmlands': 'Värmland',
    'Vasterbottens': 'Västerbotten',
    'Vasternorrlands': 'Västernorrland',
    'Vastmanlands': 'Västmanland',
    'Vastra Gotalands': 'VästraGötaland',
    'Baltic Sea - Gotland Basin': 'Gotland'
}

## UKR

In [641]:
UKR_fix = {
    'Kyiv': 'Kiev',
    'Kyiv City': 'KievCity',
    'Odesa': 'Odessa',
    'Lviv': "L'viv",
    'Dnipropetrovsk': "Dnipropetrovs'k",
    'Donetsk': "Donets'k",
    'Ivano-Frankivsk': "Ivano-Frankivs'k",
    'Khmelnytskyi': "Khmel'nyts'kyy",
    'Luhansk': "Luhans'k",
    'Mykolaiv': "Mykolayiv",
    'Ternopil': "Ternopil'",
    'Vinnytsia': "Vinnytsya",
    'Coast of Sevastopol': "Sevastopol'",
    'Cape Fiolent': "Sevastopol'",
    'Western Black Sea': 'Odessa',
    'Sea of Azov': "Crimea",
    'Eastern Black Sea': "Crimea"
}


# Special case

## FRO

In [642]:
FRO_fix = {
    'Streymoyar':'FaroeIslands'
}

## GIB

In [643]:
GIB_fix = {
    'Gibraltar': 'Gibraltar'
}

## MCO

In [644]:
MCO_fix = {
    'Monaco': 'Monaco'
}

## SMR

In [645]:
SMR_fix = {
    'Acquaviva': 'SanMarino',
    'Borgo Maggiore': 'SanMarino',
    'Serravalle': 'SanMarino'
}

## AND

In [646]:
AND_fix = {
    'Andorra la Vella':'Andorra',
    'Canillo':'Andorra',
    'Ordino':'Andorra'
}

## LIE

In [647]:
LIE_fix = {
    'Eschen':'Liechtenstein',
    'Mauren':'Liechtenstein',
    'Ruggell':'Liechtenstein',
    'Triesen':'Liechtenstein',
    'Vaduz':'Liechtenstein'
}

# Processing

In [648]:
mappings = {
    'AZE': AZE_fix,
    'ESP': ESP_fix,
    'FIN': FIN_fix,
    'FRA': FRA_fix,
    'GEO': GEO_fix,
    'GRC': GRC_fix,
    'ISL': ISL_fix,
    'KAZ': KAZ_fix,
    'LTU': LTU_fix,
    'LUX': LUX_fix,
    'LVA': LVA_fix,
    'MDA': MDA_fix,
    'MLT': MLT_fix,
    'MNE': MNE_fix,
    'NOR': NOR_fix,
    'POL': POL_fix,
    'ROU': ROU_fix,
    'RUS': RUS_fix,
    'SVN': SVN_fix,
    'SWE': SWE_fix,
    'UKR': UKR_fix,
    'ALB': ALB_fix,
    'AND': AND_fix,
    'ARM': ARM_fix,
    'AUT': AUT_fix,
    'BEL': BEL_fix,
    'BGR': BGR_fix,
    'BIH': BIH_fix,
    'BLR': BLR_fix,
    'CHE': CHE_fix,
    'CZE': CZE_fix,
    'DEU': DEU_fix,
    'DNK': DNK_fix,
    'EST': EST_fix,
    'FRO': FRO_fix,
    'GBR': GBR_fix,
    'GIB': GIB_fix,
    'HRV': HRV_fix,
    'HUN': HUN_fix,
    'IRL': IRL_fix,
    'ITA': ITA_fix,
    'KGZ': KGZ_fix,
    'LIE': LIE_fix,
    'MCO': MCO_fix,
    'MKD': MKD_fix,
    'NLD': NLD_fix,
    'PRT': PRT_fix,
    'SMR': SMR_fix,
    'SRB': SRB_fix,
    'SVK': SVK_fix,
    'TJK': TJK_fix,
    'TKM': TKM_fix,
    'UZB': UZB_fix,
    'XKO': XKO_fix
}

special_isos = ['GIB', 'FRO', 'MCO', 'LIE', 'AND', 'SMR', 'VAT']

In [649]:
df = df_filtrato

In [650]:
def get_geojson_gid_map(iso_code):
    filename = f"../../data/final/geojson/countries_choropleth/{iso_code}.geojson"
    if not os.path.exists(filename):
        print(f"File '{filename}' non trovato. GID_1 non recuperabili per {iso_code}.")
        return {}

    try:
        with open(filename, 'r', encoding='utf-8') as f:
            data = json.load(f)

        gid_map = {}
        for feature in data['features']:
            props = feature['properties']
            if 'NAME_1' in props and 'GID_1' in props:
                gid_map[props['NAME_1']] = props['GID_1']
        return gid_map
    except Exception as e:
        print(f"Errore nella lettura di {filename}: {e}")
        return {}

print("Inizio elaborazione...")

geojson_cache = {}

mask_null = df['GID_1'].isnull()
indices_to_process = df[mask_null].index

count_fixed_special = 0
count_fixed_dict = 0

for idx in indices_to_process:
    iso = df.at[idx, 'ISO']
    admin1 = df.at[idx, 'ADMIN1']

    if iso in special_isos:
        df.at[idx, 'GID_1'] = "ISO"
        count_fixed_special += 1
        continue

    if iso in mappings:
        mapped_name = mappings[iso].get(admin1, admin1)

        if iso not in geojson_cache:
            geojson_cache[iso] = get_geojson_gid_map(iso)

        if mapped_name in geojson_cache[iso]:
            df.at[idx, 'GID_1'] = geojson_cache[iso][mapped_name]
            count_fixed_dict += 1

print("-" * 30)
print("REPORT LAVORO:")
print(f"Micro-stati risolti (ISO placeholder): {count_fixed_special}")
print(f"Record risolti tramite Dizionari + GeoJSON: {count_fixed_dict}")
print(f"Rimanenti NULL: {df['GID_1'].isnull().sum()}")
print("-" * 30)

Inizio elaborazione...


------------------------------
REPORT LAVORO:
Micro-stati risolti (ISO placeholder): 107
Record risolti tramite Dizionari + GeoJSON: 48964
Rimanenti NULL: 299
------------------------------


# Data Export

In [651]:
df_country_summary = df.groupby(['ISO', 'COUNTRY', 'YEAR', 'SUB_EVENT_TYPE', 'EVENT_TYPE'])['EVENTS'].sum().reset_index()

In [652]:
df_admin_summary = df.groupby(['ISO', 'COUNTRY', 'GID_1', 'YEAR', 'SUB_EVENT_TYPE', 'EVENT_TYPE'])['EVENTS'].sum().reset_index()

In [653]:
df_country_summary.to_json('../../data/final/choropleth/df_country_summary_v5.json', orient='records')
df_admin_summary.to_json('../../data/final/choropleth/df_admin_summary_v5.json', orient='records')