In [3]:
import pandas as pd
import os


country_continent_map = {
    # Africa
    'DZA': 'Africa','AGO': 'Africa','BEN': 'Africa','BWA': 'Africa','BFA': 'Africa','BDI': 'Africa',
    'CPV': 'Africa','CMR': 'Africa','CAF': 'Africa','TCD': 'Africa','COM': 'Africa','COG': 'Africa',
    'COD': 'Africa','CIV': 'Africa','DJI': 'Africa','EGY': 'Africa','GNQ': 'Africa','ERI': 'Africa',
    'SWZ': 'Africa','ETH': 'Africa','GAB': 'Africa','GMB': 'Africa','GHA': 'Africa','GIN': 'Africa',
    'GNB': 'Africa','KEN': 'Africa','LSO': 'Africa','LBR': 'Africa','LBY': 'Africa','MDG': 'Africa',
    'MWI': 'Africa','MLI': 'Africa','MRT': 'Africa','MUS': 'Africa','MAR': 'Africa','MOZ': 'Africa',
    'NAM': 'Africa','NER': 'Africa','NGA': 'Africa','RWA': 'Africa','STP': 'Africa','SEN': 'Africa',
    'SYC': 'Africa','SLE': 'Africa','SOM': 'Africa','ZAF': 'Africa','SSD': 'Africa','SDN': 'Africa',
    'TZA': 'Africa','TGO': 'Africa','TUN': 'Africa','UGA': 'Africa','ZMB': 'Africa','ZWE': 'Africa',

    # Americas
    'ARG': 'Americas','BOL': 'Americas','BRA': 'Americas','CAN': 'Americas','CHL': 'Americas',
    'COL': 'Americas','CRI': 'Americas','CUB': 'Americas','DOM': 'Americas','ECU': 'Americas',
    'SLV': 'Americas','GTM': 'Americas','HND': 'Americas','HTI': 'Americas','JAM': 'Americas',
    'MEX': 'Americas','NIC': 'Americas','PAN': 'Americas','PRY': 'Americas','PER': 'Americas',
    'SUR': 'Americas','TTO': 'Americas','USA': 'Americas','URY': 'Americas','VEN': 'Americas',

    # Asia
    'AFG': 'Asia','ARM': 'Asia','AZE': 'Asia','BHR': 'Asia','BGD': 'Asia','BTN': 'Asia','BRN': 'Asia',
    'CHN': 'Asia','GEO': 'Asia','IND': 'Asia','IDN': 'Asia','IRN': 'Asia','IRQ': 'Asia','ISR': 'Asia',
    'JPN': 'Asia','JOR': 'Asia','KAZ': 'Asia','KWT': 'Asia','KGZ': 'Asia','LBN': 'Asia','MYS': 'Asia',
    'MDV': 'Asia','MNG': 'Asia','MMR': 'Asia','NPL': 'Asia','OMN': 'Asia','PAK': 'Asia','PHL': 'Asia',
    'QAT': 'Asia','SAU': 'Asia','SGP': 'Asia','KOR': 'Asia','LKA': 'Asia','SYR': 'Asia','TWN': 'Asia',
    'THA': 'Asia','TUR': 'Asia','TKM': 'Asia','ARE': 'Asia','UZB': 'Asia','VNM': 'Asia','YEM': 'Asia',

    # Europe
    'ALB': 'Europe','AUT': 'Europe','BEL': 'Europe','BGR': 'Europe','HRV': 'Europe','CZE': 'Europe',
    'DNK': 'Europe','EST': 'Europe','FIN': 'Europe','FRA': 'Europe','DEU': 'Europe','GRC': 'Europe',
    'HUN': 'Europe','ISL': 'Europe','IRL': 'Europe','ITA': 'Europe','LVA': 'Europe','LTU': 'Europe',
    'LUX': 'Europe','MLT': 'Europe','NLD': 'Europe','NOR': 'Europe','POL': 'Europe','PRT': 'Europe',
    'ROU': 'Europe','RUS': 'Europe','SRB': 'Europe','SVK': 'Europe','SVN': 'Europe','ESP': 'Europe',
    'SWE': 'Europe','CHE': 'Europe','UKR': 'Europe','GBR': 'Europe',

    # Oceania
    'AUS': 'Oceania','NZL': 'Oceania','PNG': 'Oceania','FJI': 'Oceania','SLB': 'Oceania'
}

# Process wide-format files
def process_wide_file(filepath, value_name):
    df = pd.read_csv(filepath, skiprows=4)
    df = df.drop(columns=[c for c in ['Indicator Name', 'Indicator Code', 'Unnamed: 68'] if c in df.columns])
    df = df.melt(
        id_vars=['Country Name', 'Country Code'],
        var_name='Year',
        value_name=value_name
    )
    df['Year'] = pd.to_numeric(df['Year'], errors='coerce')
    df = df.dropna(subset=['Year'])
    df['Year'] = df['Year'].astype(int)
    return df

# Process long-format files
def process_long_file(filepath, value_name):
    df = pd.read_csv(filepath)
    if 'Value' in df.columns:
        df = df.rename(columns={'Value': value_name})
    df['Year'] = pd.to_numeric(df['Year'], errors='coerce')
    df = df.dropna(subset=['Year'])
    df['Year'] = df['Year'].astype(int)
    return df[['Country Name', 'Country Code', 'Year', value_name]]

# Files
files_wide = {
    'GDP': 'API_NY.GDP.PCAP.KD_DS2_en_csv_v2_2624.csv',
    'Life_Expectancy': 'API_SP.DYN.LE00.IN_DS2_en_csv_v2_2505.csv',
    'Population_Growth': 'API_SP.POP.GROW_DS2_en_csv_v2_2477.csv'
}

files_long = {
    'CO2': 'API_EN.ATM.CO2E.PC_DS2_en_csv_v2.csv'
}

dfs = []

for name, file in files_wide.items():
    if os.path.exists(file):
        dfs.append(process_wide_file(file, name))

for name, file in files_long.items():
    if os.path.exists(file):
        dfs.append(process_long_file(file, name))

# Merge datasets
master_df = dfs[0]
for df in dfs[1:]:
    master_df = pd.merge(master_df, df, on=['Country Name', 'Country Code', 'Year'], how='outer')

# Filter years
master_df = master_df[(master_df['Year'] >= 1960) & (master_df['Year'] <= 2024)]

# Add region
master_df['Region'] = master_df['Country Code'].map(country_continent_map).fillna('Aggregates')

# Sort and save
master_df = master_df.sort_values(by=['Country Name', 'Year'])
master_df.to_csv('Master_Dataset_Final.csv', index=False)
