In [1]:
import pandas as pd

df = pd.read_csv("carbon-intensity-electricity.csv")
pop = pd.read_csv("world_population.csv")
pop = pop.rename(columns={'Country Code': 'Code', '2024': 'Population'})
df = df.merge(pop[['Code', 'Population']], on='Code', how='left')
# Keep only last year for each country
df = df.sort_values(["Entity", "Year"])
df = df.groupby("Entity").tail(1)
df_without_code = df[pd.isnull(df["Code"])]
df = df[~pd.isnull(df["Code"])]
df.head()

Unnamed: 0,Entity,Code,Year,Carbon intensity of electricity - gCO2/kWh,Population
48,Afghanistan,AFG,2023,123.711334,42647492.0
121,Albania,ALB,2023,24.417313,2714617.0
145,Algeria,DZA,2023,633.64484,46814308.0
169,American Samoa,ASM,2023,647.05884,46765.0
193,Angola,AGO,2023,167.22408,37885849.0


In [2]:
df_without_code.head()

Unnamed: 0,Entity,Code,Year,Carbon intensity of electricity - gCO2/kWh,Population
24,ASEAN (Ember),,2024,569.94794,
72,Africa,,2023,545.90845,
97,Africa (Ember),,2024,542.41693,
314,Asia,,2023,593.83777,
339,Asia (Ember),,2024,573.0421,


In [3]:
# Entities with no population
df[pd.isnull(df["Population"])]

Unnamed: 0,Entity,Code,Year,Carbon intensity of electricity - gCO2/kWh,Population
1189,Cook Islands,COK,2022,250.0,
1797,Falkland Islands,FLK,2022,1000.0,
1917,French Guiana,GUF,2022,204.08163,
2230,Guadeloupe,GLP,2022,493.90244,
2833,Kosovo,OWID_KOS,2024,958.7156,
3339,Martinique,MTQ,2022,516.7785,
3529,Montserrat,MSR,2022,1000.0,
4334,Reunion,REU,2022,525.22253,
4431,Saint Helena,SHN,2022,1000.0,
4500,Saint Pierre and Miquelon,SPM,2022,600.0,


In [4]:
big_countries = df[df["Population"]>5000000].sort_values(by="Entity").copy()
big_countries

Unnamed: 0,Entity,Code,Year,Carbon intensity of electricity - gCO2/kWh,Population
48,Afghanistan,AFG,2023,123.711334,42647492.0
145,Algeria,DZA,2023,633.644840,46814308.0
193,Angola,AGO,2023,167.224080,37885849.0
242,Argentina,ARG,2024,358.948240,45696159.0
364,Australia,AUS,2024,551.589840,27204809.0
...,...,...,...,...,...
5604,Venezuela,VEN,2023,180.250780,28405543.0
5629,Vietnam,VNM,2024,471.158570,100987686.0
5688,Yemen,YEM,2023,586.319200,40583164.0
5712,Zambia,ZMB,2023,110.996925,21314956.0


In [5]:
import pycountry
import pytz

def get_alpha2(country):
    """
    country: either alpha-3 code (e.g. 'FRA'), or full name ('France'),
             or short name ('United States'), etc.
    Returns: ISO alpha-2 code (e.g. 'FR')
    """
    # If this looks like an alpha-3 code
    if len(country) == 3 and country.isalpha():
        try:
            c = pycountry.countries.get(alpha_3=country.upper())
            if c:
                return c.alpha_2
        except KeyError:
            pass

    # Try exact name match
    try:
        c = pycountry.countries.lookup(country)
        return c.alpha_2
    except LookupError:
        return None


In [6]:
def country_timezones(country):
    alpha2 = get_alpha2(country)
    if not alpha2:
        raise ValueError(f"No alpha2 found for {country}")

    tzens = pytz.country_timezones.get(alpha2)
    if not tzens:
        raise ValueError(f"No tz found for {country}")

    return tzens


In [7]:
countries_with_multiple_timezones = {}
for index, row in big_countries.iterrows():
    tzens = country_timezones(row["Code"])
    if len(tzens) > 1:
        countries_with_multiple_timezones[row["Code"]] = tzens

In [8]:
capitals_timezones_dict = {
    "ARG": "America/Argentina/Buenos_Aires",
    "AUS": "Australia/Sydney",
    "BRA": "America/Sao_Paulo",
    "CAN": "America/Toronto",
    "CHL": "America/Santiago",
    "CHN": "Asia/Shanghai",
    "COD": "Africa/Kinshasa",
    "ECU": "America/Guayaquil",
    "DEU": "Europe/Berlin",
    "IDN": "Asia/Jakarta",
    "KAZ": "Asia/Almaty",
    "MYS": "Asia/Kuala_Lumpur",
    "MEX": "America/Mexico_City",
    "PNG": "Pacific/Port_Moresby",
    "PRT": "Europe/Lisbon",
    "RUS": "Europe/Moscow",
    "ESP": "Europe/Madrid",
    "UKR": "Europe/Kyiv",
    "USA": "America/New_York",
    "UZB": "Asia/Tashkent"
}

In [9]:
for capital, timezone in capitals_timezones_dict.items():
    assert timezone in countries_with_multiple_timezones[capital]

In [10]:
def country_timezone(country_code: str):
    if country_code in capitals_timezones_dict:
        return capitals_timezones_dict[country_code]
    else:
        return country_timezones(country_code)[0]

In [11]:
big_countries["Timezone"] = big_countries["Code"].apply(country_timezone)

In [12]:
big_countries

Unnamed: 0,Entity,Code,Year,Carbon intensity of electricity - gCO2/kWh,Population,Timezone
48,Afghanistan,AFG,2023,123.711334,42647492.0,Asia/Kabul
145,Algeria,DZA,2023,633.644840,46814308.0,Africa/Algiers
193,Angola,AGO,2023,167.224080,37885849.0,Africa/Luanda
242,Argentina,ARG,2024,358.948240,45696159.0,America/Argentina/Buenos_Aires
364,Australia,AUS,2024,551.589840,27204809.0,Australia/Sydney
...,...,...,...,...,...,...
5604,Venezuela,VEN,2023,180.250780,28405543.0,America/Caracas
5629,Vietnam,VNM,2024,471.158570,100987686.0,Asia/Ho_Chi_Minh
5688,Yemen,YEM,2023,586.319200,40583164.0,Asia/Aden
5712,Zambia,ZMB,2023,110.996925,21314956.0,Africa/Lusaka


In [13]:
assert not big_countries.isnull().values.any(), "Null values detected in big_countries"

In [14]:
big_countries.drop("Population", axis=1).to_csv("countries_elec_carbon_intensity_and_timezone.csv", index=False)

In [15]:
# automatically generate Countries class attributes
for country in big_countries["Entity"]:
    print(f"{country.replace(" ", "_").upper()} = country_generator_from_csv(\"{country}\")")

AFGHANISTAN = country_generator_from_csv("Afghanistan")
ALGERIA = country_generator_from_csv("Algeria")
ANGOLA = country_generator_from_csv("Angola")
ARGENTINA = country_generator_from_csv("Argentina")
AUSTRALIA = country_generator_from_csv("Australia")
AUSTRIA = country_generator_from_csv("Austria")
AZERBAIJAN = country_generator_from_csv("Azerbaijan")
BANGLADESH = country_generator_from_csv("Bangladesh")
BELARUS = country_generator_from_csv("Belarus")
BELGIUM = country_generator_from_csv("Belgium")
BENIN = country_generator_from_csv("Benin")
BOLIVIA = country_generator_from_csv("Bolivia")
BRAZIL = country_generator_from_csv("Brazil")
BULGARIA = country_generator_from_csv("Bulgaria")
BURKINA_FASO = country_generator_from_csv("Burkina Faso")
BURUNDI = country_generator_from_csv("Burundi")
CAMBODIA = country_generator_from_csv("Cambodia")
CAMEROON = country_generator_from_csv("Cameroon")
CANADA = country_generator_from_csv("Canada")
CENTRAL_AFRICAN_REPUBLIC = country_generator_from_csv(