In [71]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import os
from IPython.display import display
import glob

In [90]:
files1 = [
    "Annual_Surface_Temperature_Change.csv",
    "Climate-related_Disasters_Frequency.csv"
]

dfs = []
for file in files1:
    df = pd.read_csv(file).drop(columns=['ObjectId'])
    id_vars = ['Country', 'ISO2', 'ISO3', 'Indicator', 'Unit', 'Source', 'CTS_Code', 'CTS_Name', 
               'CTS_Full_Descriptor']
    df = pd.melt(df, id_vars=id_vars, var_name='year', value_name='value')
    df['year'] = df['year'].str.extract('(\d+)').astype(int)
    cols = ['year', 'Country'] + [col for col in df.columns if col not in ['year', 'Country']]
    df = df[cols]
    dfs.append(df)

surface_temperature, disasters_frequency = dfs

display(surface_temperature.head())
display(disasters_frequency.head())

for file in files1:
    df = pd.read_csv(file)
    total_rows = len(df)
    nan_rows = df.isna().any(axis=1).sum()
    proportion = nan_rows / total_rows if total_rows > 0 else 0
    print(f"{file}: {nan_rows} rows contain NaN ({proportion:.2%})")

Unnamed: 0,year,Country,ISO2,ISO3,Indicator,Unit,Source,CTS_Code,CTS_Name,CTS_Full_Descriptor,value
0,1961,"Afghanistan, Islamic Rep. of",AF,AFG,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,ECCS,Surface Temperature Change,"Environment, Climate Change, Climate Indicator...",-0.113
1,1961,Albania,AL,ALB,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,ECCS,Surface Temperature Change,"Environment, Climate Change, Climate Indicator...",0.627
2,1961,Algeria,DZ,DZA,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,ECCS,Surface Temperature Change,"Environment, Climate Change, Climate Indicator...",0.164
3,1961,American Samoa,AS,ASM,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,ECCS,Surface Temperature Change,"Environment, Climate Change, Climate Indicator...",0.079
4,1961,"Andorra, Principality of",AD,AND,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,ECCS,Surface Temperature Change,"Environment, Climate Change, Climate Indicator...",0.736


Unnamed: 0,year,Country,ISO2,ISO3,Indicator,Unit,Source,CTS_Code,CTS_Name,CTS_Full_Descriptor,value
0,1980,"Afghanistan, Islamic Rep. of",AF,AFG,"Climate related disasters frequency, Number of...",Number of,"The Emergency Events Database (EM-DAT) , Centr...",ECCD,Climate Related Disasters Frequency,"Environment, Climate Change, Climate Indicator...",
1,1980,"Afghanistan, Islamic Rep. of",AF,AFG,"Climate related disasters frequency, Number of...",Number of,"The Emergency Events Database (EM-DAT) , Centr...",ECCD,Climate Related Disasters Frequency,"Environment, Climate Change, Climate Indicator...",
2,1980,"Afghanistan, Islamic Rep. of",AF,AFG,"Climate related disasters frequency, Number of...",Number of,"The Emergency Events Database (EM-DAT) , Centr...",ECCD,Climate Related Disasters Frequency,"Environment, Climate Change, Climate Indicator...",1.0
3,1980,"Afghanistan, Islamic Rep. of",AF,AFG,"Climate related disasters frequency, Number of...",Number of,"The Emergency Events Database (EM-DAT) , Centr...",ECCD,Climate Related Disasters Frequency,"Environment, Climate Change, Climate Indicator...",
4,1980,"Afghanistan, Islamic Rep. of",AF,AFG,"Climate related disasters frequency, Number of...",Number of,"The Emergency Events Database (EM-DAT) , Centr...",ECCD,Climate Related Disasters Frequency,"Environment, Climate Change, Climate Indicator...",


Annual_Surface_Temperature_Change.csv: 69 rows contain NaN (30.67%)
Climate-related_Disasters_Frequency.csv: 956 rows contain NaN (98.56%)


In [75]:
files2 = [
    "Biofuels.csv",
    "Coal and coke.csv",
    "Electricity.csv",
    "Emissions.csv",
    "Hydrocarbon gas liquids.csv",
    "Natural gas.csv",
    "Petroleum and other liquids.csv",
    "Primary energy(quadrillion BTU).csv"
]
for file in files2:
    df = pd.read_csv(file)
    num_rows_with_nan = df.isna().any(axis=1).sum()
    total_rows = len(df)
    print(f"{file}: {num_rows_with_nan} / {total_rows} rows with NaN values")

Biofuels.csv: 1242 / 1664 rows with NaN values
Coal and coke.csv: 577 / 4439 rows with NaN values
Electricity.csv: 6151 / 17204 rows with NaN values
Emissions.csv: 3299 / 3323 rows with NaN values
Hydrocarbon gas liquids.csv: 1983 / 2219 rows with NaN values
Natural gas.csv: 2824 / 3929 rows with NaN values
Petroleum and other liquids.csv: 7769 / 7769 rows with NaN values
Primary energy(quadrillion BTU).csv: 565 / 4431 rows with NaN values


In [93]:
for file in files21:
    df = pd.read_csv(file)
    total_rows = len(df)
    nan_rows = df.isna().any(axis=1).sum()
    proportion = nan_rows / total_rows if total_rows > 0 else 0
    print(f"{file}: {nan_rows} rows contain NaN ({proportion:.2%})")

processed_Primary energy(quadrillion BTU).csv: 565 rows contain NaN (12.75%)
processed_Petroleum and other liquids.csv: 7768 rows contain NaN (100.00%)
processed_Natural gas.csv: 2823 rows contain NaN (71.87%)
processed_Hydrocarbon gas liquids.csv: 1982 rows contain NaN (89.36%)
processed_Emissions.csv: 3298 rows contain NaN (99.28%)
processed_Electricity.csv: 6151 rows contain NaN (35.76%)
processed_Coal and coke.csv: 577 rows contain NaN (13.00%)
processed_Biofuels.csv: 1242 rows contain NaN (74.68%)


In [97]:
emissions_df = pd.read_csv('processed_Emissions.csv')
def map_emissions_type(value):
    if value in [
        '    Emissions',
        '        CO2 emissions (MMtonnes CO2)',
        '            Coal and coke (MMtonnes CO2)',
        '            Consumed natural gas (MMtonnes CO2)',
        '            Petroleum and other liquids (MMtonnes CO2)'
    ]:
        return value
    else:
        return None 

emissions_df['Type of Emissions'] = emissions_df['Unnamed: 1'].apply(map_emissions_type)
columns = list(emissions_df.columns)
columns.remove('Type of Emissions')
columns.insert(1, 'Type of Emissions')
emissions_df = emissions_df[columns]

In [98]:
files21 = [
    "processed_Primary energy(quadrillion BTU).csv",
    "processed_Petroleum and other liquids.csv",
    "processed_Natural gas.csv",
    "processed_Hydrocarbon gas liquids.csv",
    "processed_Emissions.csv",
    "processed_Electricity.csv",
    "processed_Coal and coke.csv",
    "processed_Biofuels.csv"
]
    
for file in files21:
    df21 = pd.read_csv(file)
    if 'API' in df21.columns:
        df21 = df21.drop(columns=['API'])
    display(df21)
    df21.to_csv(file, index=False)


Unnamed: 0,Unnamed: 1,1980,1981,1982,1983,1984,1985,1986,1987,1988,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
0,Coal (quad Btu),15.974465135754876,15.686072870597357,16.431467059160603,16.462883846026156,16.814796294305236,17.095813338864456,17.101171578290174,16.932104637775158,16.71542382858122,...,5.9448520158886735,5.6696794887424415,5.606485891714504,5.226383027905209,5.171614540387495,4.900326685622359,4.227527308822338,3.5124028996754344,3.7910848645634534,3.7194833709394253
1,Natural gas (quad Btu),6.3089346,6.1652379,5.78354,5.9876936,6.0493976,6.0777928,5.8725963,5.9611368,5.5869213,...,4.343589419072742,3.682310547682472,3.134111727565398,3.0899756308110917,2.9129224182608775,2.6000335068444027,2.2941567134598384,1.8147159963953359,1.6770850203207792,1.5504517936445783
2,Petroleum and other liquids (quad Btu),0,0,0,0,1.3186148767404968,1.3654111786397638,1.4233188047462118,1.4674480909122611,1.484790624942737,...,1.1746042263503438,1.1640954705865265,1.116777625464159,0.9813564864084638,0.9511727830139859,0.9270477437310991,0.8627297074049656,0.8605941557450906,0.8244073804780659,0.7662997249338349
3,"Nuclear, renewables, and other (quad Btu)",3.139073067996473,3.7803065325036975,4.019633696821149,4.582789704219429,5.6893350519171975,6.5982247004393475,7.110388936359894,7.396137378977874,7.939506658586123,...,12.146841132730799,12.402824805580755,12.205701318253546,12.055126999602225,12.046482556718674,12.314891212233503,12.447336199461244,11.931399964417507,12.542071497760258,11.31914608319376
4,Nuclear (quad Btu),2.01755652510157,2.6802033891115076,2.9365320014698155,3.4871658405621866,4.561760587910947,5.466393697750254,6.023479532302228,6.252353645147975,6.696693991206567,...,7.934289984556186,7.996855036651479,7.747812737889196,7.556485521003166,7.478659205554019,7.5067783647622734,7.549267345544007,6.713497604836161,7.227046355634952,6.03349764760203
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4425,Natural gas (quad Btu),19.907600094,19.699155047,18.319024764,16.593389291,18.007932632,16.980380424,16.5408009,17.135819011,17.598597009,...,24.859072419,26.718072525,28.066881906,27.576023261,28.289335056,31.88214802,35.187183375,35.06214088,35.80685922,37.661731595
4426,Petroleum and other liquids (quad Btu),20.473757967,20.412303121,20.458149338,20.528111329,21.082043165,21.196174616,20.4887396,19.853719522,19.509620491,...,19.319145507,22.61521608,24.173302598,23.191447081,24.533705626,28.534879995,31.955943876,30.379749192,30.499986736,32.452393637
4427,"Nuclear, renewables, and other (quad Btu)",6.184547018,6.523510617,6.876944163,7.27171313,7.65665123,8.093736604,8.350582044,8.528376112,9.409650598,...,14.805215887,15.173147656,15.182813299,15.615169987,15.923563022,16.182262741,16.204669646,15.716277104,15.938213973,16.368361653
4428,Nuclear (quad Btu),2.739168687,3.007588734,3.131148173,3.202548979,3.552530566,4.0755629,4.380108667,4.753933331,5.586968249,...,8.244433126,8.337559005,8.336886237,8.426753002,8.418968232,8.438068207,8.451851513,8.251074602,8.130913481,8.061020419


Unnamed: 0,Unnamed: 1,1973,1974,1975,1976,1977,1978,1979,1980,1981,...,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024
0,Austria,,,,,,,,30,28,...,31.574990410958904,29.36100218579235,27.186649315068493,26.92278904109589,26.442115068493152,24.4670043715847,24.902112195569963,23.946440004527354,23.10385494558548,
1,Belgium,,,,,,,,8,8,...,21.057,19.9088,19.6025,18.8857,24.3528,23.6427,24.79236143224496,24.6058295771095,24.939280789220742,
2,Bulgaria,,,,,,,,3,3,...,3.9468,4.2034,6.0457,6.0647,6.0875,5.9982,6.142786379499914,6.119306322579973,6.1601399615128765,
3,Croatia,,,,,,,,--,--,...,17.77737808219178,17.554367213114755,17.96093698630137,17.97708082191781,17.614609589041095,15.8241956284153,15.50214060005226,15.09152099938159,14.475327420282191,
4,Cyprus,,,,,,,,0,0,...,0,0.01,0,0,0,0,0,0,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7763,Turkiye,,,,,,,,37,44,...,,,,,,,,,,
7764,U.S. Territories,,,,,,,,,,...,,,,,,,,,,
7765,U.S. Virgin Islands,,,,,,,,,,...,,,,,,,,,,
7766,United Kingdom,,,,,,,,289,271,...,,,,,,,,,,


Unnamed: 0,Unnamed: 1,1980,1981,1982,1983,1984,1985,1986,1987,1988,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
0,Afghanistan,,,,,,,,,,...,5.636274,5.636274,6.674535,,,,,,,
1,Albania,,,,,,,,,,...,0.670985,1.13008,1.236025,,,,,,,
2,Algeria,,,,,,,,,,...,6338.68935,6594.15806,6491.74456,,,,,,,
3,American Samoa,,,,,,,,,,...,0,0,0,,,,,,,
4,Angola,,,,,,,,,,...,379.9894,369.04175,372.255415,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3923,Turkiye,0.5,0.49,0.55,0.545,1.01,0.6,0.49,0.46,1.03,...,0.218,0.241,0.218,0.177,0.131,0.18,0.17,0.134,0.134,
3924,U.S. Territories,,,,,,,,,,...,,,,,,,,,,
3925,U.S. Virgin Islands,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3926,United Kingdom,25,24.8,26,25.4,25.1,27.8,33,33.4,22,...,8.687,8.616,8.502,7.254,7.319,6.38,6.38,6.38,6.38,


Unnamed: 0,Unnamed: 1,1980,1981,1982,1983,1984,1985,1986,1987,1988,...,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014
0,Austria,,,,,2.4,2.9,2,2.4,1.4,...,3.4,1.6,2.3,3.1,2.9,2.8,3.2,2.2,2.3,
1,Belgium,16.64989152,15,13,12,12,14,17,17,16,...,15,13,15,16,14,17,17,15,18,
2,Bulgaria,,,,,,,3.1,3.3,3.1,...,3.5,4.2,4.3,5.1,5.1,4,3.9,4,,
3,Croatia,--,--,--,--,--,--,--,--,--,...,15,18,12,12,13,11,9.6,8.9,,
4,Cyprus,,,,,,,0.7,0.7,1,...,0,0,0,0,0,0,0,0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2213,Turkiye,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2214,U.S. Territories,,,,,,,,,,...,,,,,,,,,,
2215,U.S. Virgin Islands,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,,
2216,United Kingdom,0,0,0,0,0,0,0,0,0,...,58,53,50,55,48,42,35,33,35,39


Unnamed: 0,Unnamed: 1,1949,1950,1951,1952,1953,1954,1955,1956,1957,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
0,CO2 emissions (MMtonnes CO2),,,,,,,,,,...,3233.894895138,3095.50335549,3151.795757334,3158.350350425,3229.106490498,3173.837904551,3050.515706002,2674.174407197,2861.862122568,2810.895446026
1,Coal and coke (MMtonnes CO2),,,,,,,,,,...,966.178590520027,928.0586723010746,924.3504438731959,881.395946525172,885.8347672131258,837.4372789408834,686.1986177198614,553.60432220144,625.2556464438163,638.1503754676833
2,Consumed natural gas (MMtonnes CO2),,,,,,,,,,...,748.812141588,665.44437301,693.833401501,733.071018948,771.853227747,760.421619714,785.818991333,750.968713961,788.586325812,672.944252201
3,Petroleum and other liquids (MMton...,,,,,,,,,,...,1518.9041630282206,1502.0003101813543,1533.6119119599196,1543.883384950255,1571.418495536,1575.979005895,1578.4980969494302,1369.6013710344841,1448.0201503162598,1499.8008183615273
4,Austria,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3317,Emissions,,,,,,,,,,...,,,,,,,,,,
3318,CO2 emissions (MMtonnes CO2),2206.690829,2382.046176,2526.687327,2473.373964,2536.892888,2422.252560,2684.786146,2777.042758,2756.562392,...,5359.016761,5414.028471,5262.200304,5169.044195,5131.529178,5277.882471,5146.992588,4583.690108,4904.569835,4941.235774
3319,Coal and coke (MMtonnes CO2),1117.538985,1151.600346,1166.858975,1052.386511,1057.069608,904.191799,1037.883229,1054.597765,1003.746034,...,1717.844544,1713.367094,1481.89792,1355.200263,1317.951988,1262.555409,1077.519881,876.195454,1002.650633,938.649433
3320,Consumed natural gas (MMtonnes CO2),269.537229,312.826758,369.725933,396.144693,414.852766,437.259069,472.164274,504.395476,534.718185,...,1408.379797,1437.536155,1478.884313,1489.851251,1470.964449,1626.90757,1684.623741,1653.000683,1656.119591,1742.143174


Unnamed: 0,Unnamed: 1,1980,1981,1982,1983,1984,1985,1986,1987,1988,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
0,Austria,40.574,41.233,41.458,41.191,40.97,43.033,43.295,38.842,47.491,...,62.453,59.3411,59.9674,63.4448,65.4933,62.8535,70.3772,69.631,68.6257,59.7065553
1,Belgium,50.572,47.537,47.25,49.041,50.813,53.112,54.576,59.154,61.423,...,79.22,68.877,66.3918,83.5751,84.5753,73.6327,91.9311,87.1998,97.3069,90.8934796
2,Bulgaria,32.854,34.868,38.112,40.185,42.08,39.184,39.354,40.916,42.409,...,41.7996,45.763,47.5884,43.3792,43.8073,45.5374,42.8087,39.2078,46.3552,50.180099
3,Croatia,--,--,--,--,--,--,--,--,--,...,13.739,13.181,11.117,12.1545,11.4109,13.0298,12.0825,12.8142,15.0062,14.1458618
4,Cyprus,0.968,0.993,1.071,1.148,1.175,1.24,1.338,1.421,1.568,...,4.293,4.347,4.5346,4.8876,5.0044,5.0605,5.1415,4.8491,5.1164,5.2607329
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17198,Turkiye,2.824,2.931,3.317,3.422,3.74,3.945,5.447,5.518,6.308,...,37.134,37.331,36.528,35.0161,34.705,31.5049,31.2602,29.1264,30.553,30.4520407
17199,U.S. Territories,,,,,,,,,,...,,,,,,,,,,
17200,U.S. Virgin Islands,0.05243,0.05537,0.05607,0.05761,0.05789,0.05922,0.06055,0.06335,0.06384,...,0.06,0.06,0.06,0.065,0.05,0.051,0.051,0.05,0.05,0.05
17201,United Kingdom,21.534,20.123,20.479,21.206,23.055,24.603,25.065,26.692,23.754,...,26.55,27.448,28.607,26.3305,26.835,25.7043,25.4496,26.3227,25.3407,26.125


Unnamed: 0,Unnamed: 1,1980,1981,1982,1983,1984,1985,1986,1987,1988,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
0,Austria,3158.11815,3374.17091,3634.31607,3352.12471,3227.56368,3396.21711,3272.75839,3071.03566,2346.81799,...,0,0,0,0,0,0,0,0,0,0
1,Belgium,6971.00844,6818.88966,7248.79056,6745.03489,6990.85002,6875.10747,6200.49375,4817.0947,2741.44497,...,0,0,0,0,0,0,0,0,0,0
2,Bulgaria,33304.09203,32232.64671,35510.91665,35703.8209,37099.34536,34039.3328,38826.66513,40585.95189,37640.57957,...,31549.21451,34467.02908,39527.73429,34426.24361,37784.25686,33359.1854838,30865.38988764,24579.8374888,31183.54300908,39149.6640062
3,Croatia,--,--,--,--,--,--,--,--,--,...,0,0,0,0,0,0,0,0,0,0
4,Cyprus,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4433,Turkiye,,,,,,,,,,...,9592.30162,9592.30162,12514.52543,12514.52543,12514.52543,12704.12275,12704.12275,12704.12275,12704.12275,12704.12275
4434,U.S. Territories,,,,,,,,,,...,,,,,,,,,,0
4435,U.S. Virgin Islands,,,,,,,,,,...,0,0,0,0,0,0,0,0,0,0
4436,United Kingdom,,,,,,,,,,...,291.00984,77.1617,77,77,77,28.66006,28.66006,28.66006,28.66006,28.66006


Unnamed: 0,Unnamed: 1,1980,1981,1982,1983,1984,1985,1986,1987,1988,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
0,Austria,0,0,0,0,0,0,0,0,0,...,5.57762,9.26792,10.43141,9.65249,8.69962,8.61626,9.44374,8.130170,9.283590,10.052630
1,Belgium,0,0,0,0,0,0,0,0,0,...,10.64739,13.44532,9.50842,9.18943,10.93742,10.63548,11.37241,12.110400,11.302530,10.981650
2,Bulgaria,0,0,0,0,0,0,0,0,0,...,1.03788,1.51039,1.37187,1.67608,2.11202,2.65782,3.45351,2.798020,4.011000,5.481710
3,Croatia,--,--,--,--,--,--,--,--,--,...,0.59977,0.63215,0.31343,0.02027,0.00678,0.00742,0.00472,0.003040,0.006970,0.007150
4,Cyprus,0,0,0,0,0,0,0,0,0,...,0.03326,0,0.01341,0.0087,0.02609,0,0,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1658,Turkiye,,,,,,,,,,...,0,0,0,0,0.00223079,0.00018084,0.05759437,0.326402,0.265227,0.265227
1659,U.S. Territories,,,,,,,,,,...,,,,,,,,,,
1660,U.S. Virgin Islands,,,,,,,,,,...,0,0,0,0,0,0,0,,,
1661,United Kingdom,,,,,,,,,,...,4.64965142,2.49703502,2.58313968,6.02732591,8.09383766,8.61046559,9.12709353,7.749419,8.179942,8.610466


In [99]:
emissions_df.head()

Unnamed: 0,Unnamed: 1,Type of Emissions,1949,1950,1951,1952,1953,1954,1955,1956,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
0,CO2 emissions (MMtonnes CO2),CO2 emissions (MMtonnes CO2),,,,,,,,,...,3233.894895138,3095.50335549,3151.795757334,3158.350350425,3229.106490498,3173.837904551,3050.515706002,2674.174407197,2861.862122568,2810.895446026
1,Coal and coke (MMtonnes CO2),Coal and coke (MMtonnes CO2),,,,,,,,,...,966.178590520027,928.0586723010746,924.350443873196,881.395946525172,885.8347672131258,837.4372789408834,686.1986177198614,553.60432220144,625.2556464438163,638.1503754676833
2,Consumed natural gas (MMtonnes CO2),Consumed natural gas (MMtonnes CO2),,,,,,,,,...,748.812141588,665.44437301,693.833401501,733.071018948,771.853227747,760.421619714,785.818991333,750.968713961,788.586325812,672.944252201
3,Petroleum and other liquids (MMton...,Petroleum and other liquids (MMton...,,,,,,,,,...,1518.9041630282206,1502.0003101813545,1533.6119119599196,1543.883384950255,1571.418495536,1575.979005895,1578.4980969494302,1369.601371034484,1448.0201503162598,1499.8008183615273
4,Austria,,,,,,,,,,...,,,,,,,,,,


In [100]:
values_to_replace = [
    '    Emissions',
    '        CO2 emissions (MMtonnes CO2)',
    '            Coal and coke (MMtonnes CO2)',
    '            Consumed natural gas (MMtonnes CO2)',
    '            Petroleum and other liquids (MMtonnes CO2)'
]

emissions_df['Unnamed: 1'] = emissions_df['Unnamed: 1'].astype(str).replace(values_to_replace, '')
emissions_df = emissions_df.rename(columns={'Unnamed: 1': 'Country'})
new_df = pd.concat([pd.DataFrame({'Country': ['European Union']}), emissions_df], ignore_index=True)

total_rows = len(new_df)
nan_rows = new_df['Country'].isna().sum()
proportion_nan = nan_rows / total_rows if total_rows > 0 else 0
print(f"Proportion of NaN values: {proportion_nan:.2%}")

display(new_df.head())

# Optional: Save the modified DataFrame to a new CSV file
# emissions_df.to_csv('path_to_save_modified_file.csv', index=False)

Proportion of NaN values: 0.00%


Unnamed: 0,Country,Type of Emissions,1949,1950,1951,1952,1953,1954,1955,1956,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
0,European Union,,,,,,,,,,...,,,,,,,,,,
1,,CO2 emissions (MMtonnes CO2),,,,,,,,,...,3233.894895138,3095.50335549,3151.795757334,3158.350350425,3229.106490498,3173.837904551,3050.515706002,2674.174407197,2861.862122568,2810.895446026
2,,Coal and coke (MMtonnes CO2),,,,,,,,,...,966.178590520027,928.0586723010746,924.350443873196,881.395946525172,885.8347672131258,837.4372789408834,686.1986177198614,553.60432220144,625.2556464438163,638.1503754676833
3,,Consumed natural gas (MMtonnes CO2),,,,,,,,,...,748.812141588,665.44437301,693.833401501,733.071018948,771.853227747,760.421619714,785.818991333,750.968713961,788.586325812,672.944252201
4,,Petroleum and other liquids (MMton...,,,,,,,,,...,1518.9041630282206,1502.0003101813545,1533.6119119599196,1543.883384950255,1571.418495536,1575.979005895,1578.4980969494302,1369.601371034484,1448.0201503162598,1499.8008183615273


In [72]:
df = pd.read_excel("Extreme Weather Events.xlsx")
df.to_csv("Extreme Weather Events.csv", index=False)

In [101]:
df = pd.read_csv("Extreme Weather Events.csv")
total_cells = df.size
nan_cells = df.isna().sum().sum()
proportion_nan = nan_cells / total_cells if total_cells > 0 else 0
print(f"Proportion of NaN values: {proportion_nan:.2%}")
df.head()

Proportion of NaN values: 33.18%


Unnamed: 0,DisNo.,Historic,Classification Key,Disaster Group,Disaster Subgroup,Disaster Type,Disaster Subtype,External IDs,Event Name,ISO,...,Reconstruction Costs ('000 US$),"Reconstruction Costs, Adjusted ('000 US$)",Insured Damage ('000 US$),"Insured Damage, Adjusted ('000 US$)",Total Damage ('000 US$),"Total Damage, Adjusted ('000 US$)",CPI,Admin Units,Entry Date,Last Update
0,1999-9388-DJI,No,nat-cli-dro-dro,Natural,Climatological,Drought,Drought,,,DJI,...,,,,,,,58.111474,"[{""adm1_code"":1093,""adm1_name"":""Ali Sabieh""},{...",2006-03-01,2023-09-25
1,1999-9388-SDN,No,nat-cli-dro-dro,Natural,Climatological,Drought,Drought,,,SDN,...,,,,,,,56.514291,"[{""adm1_code"":2757,""adm1_name"":""Northern Darfu...",2006-03-08,2023-09-25
2,1999-9388-SOM,No,nat-cli-dro-dro,Natural,Climatological,Drought,Drought,,,SOM,...,,,,,,,56.514291,"[{""adm1_code"":2691,""adm1_name"":""Bay""},{""adm1_c...",2006-03-08,2023-09-25
3,2000-0002-AGO,No,nat-hyd-flo-riv,Natural,Hydrological,Flood,Riverine flood,,,AGO,...,,,,,10000.0,17695.0,56.514291,"[{""adm2_code"":4214,""adm2_name"":""Baia Farta""},{...",2005-02-03,2023-09-25
4,2000-0003-BGD,No,nat-met-ext-col,Natural,Meteorological,Extreme temperature,Cold wave,,,BGD,...,,,,,,,56.514291,"[{""adm1_code"":575,""adm1_name"":""Barisal""},{""adm...",2003-07-01,2023-09-25


In [102]:

dfC = pd.read_csv("country_hsproduct4digit_year.csv",low_memory=False)
code_to_country = {'WLD': 'World',
    'AFG': 'Afghanistan',
    'ALB': 'Albania',
    'DZA': 'Algeria',
    'ASM': 'American Samoa',
    'AND': 'Andorra',
    'AGO': 'Angola',
    'AIA': 'Anguilla',
    'ATA': 'Antarctica',
    'ATG': 'Antigua and Barbuda',
    'ARG': 'Argentina',
    'ARM': 'Armenia',
    'ABW': 'Aruba',
    'AUS': 'Australia',
    'AUT': 'Austria',
    'AZE': 'Azerbaijan',
    'BHS': 'Bahamas (the)',
    'BHR': 'Bahrain',
    'BGD': 'Bangladesh',
    'BRB': 'Barbados',
    'BLR': 'Belarus',
    'BEL': 'Belgium',
    'BLZ': 'Belize',
    'BEN': 'Benin',
    'BMU': 'Bermuda',
    'BTN': 'Bhutan',
    'BOL': 'Bolivia (Plurinational State of)',
    'BES': 'Bonaire, Sint Eustatius and Saba',
    'BIH': 'Bosnia and Herzegovina',
    'BWA': 'Botswana',
    'BVT': 'Bouvet Island',
    'BRA': 'Brazil',
    'IOT': 'British Indian Ocean Territory (the)',
    'BRN': 'Brunei Darussalam',
    'BGR': 'Bulgaria',
    'BFA': 'Burkina Faso',
    'BDI': 'Burundi',
    'CPV': 'Cabo Verde',
    'KHM': 'Cambodia',
    'CMR': 'Cameroon',
    'CAN': 'Canada',
    'CYM': 'Cayman Islands (the)',
    'CAF': 'Central African Republic (the)',
    'TCD': 'Chad',
    'CHL': 'Chile',
    'CHN': 'China',
    'CXR': 'Christmas Island',
    'CCK': 'Cocos (Keeling) Islands (the)',
    'COL': 'Colombia',
    'COM': 'Comoros (the)',
    'COD': 'Congo (the Democratic Republic of the)',
    'COG': 'Congo (the)',
    'COK': 'Cook Islands (the)',
    'CRI': 'Costa Rica',
    'HRV': 'Croatia',
    'CUB': 'Cuba',
    'CUW': 'Curaçao',
    'CYP': 'Cyprus',
    'CZE': 'Czechia',
    'CIV': 'Côte d\'Ivoire',
    'DNK': 'Denmark',
    'DJI': 'Djibouti',
    'DMA': 'Dominica',
    'DOM': 'Dominican Republic (the)',
    'ECU': 'Ecuador',
    'EGY': 'Egypt',
    'SLV': 'El Salvador',
    'GNQ': 'Equatorial Guinea',
    'ERI': 'Eritrea',
    'EST': 'Estonia',
    'SWZ': 'Eswatini',
    'ETH': 'Ethiopia',
    'FLK': 'Falkland Islands (the) [Malvinas]',
    'FRO': 'Faroe Islands (the)',
    'FJI': 'Fiji',
    'FIN': 'Finland',
    'FRA': 'France',
    'GUF': 'French Guiana',
    'PYF': 'French Polynesia',
    'ATF': 'French Southern Territories (the)',
    'GAB': 'Gabon',
    'GMB': 'Gambia (the)',
    'GEO': 'Georgia',
    'DEU': 'Germany',
    'GHA': 'Ghana',
    'GIB': 'Gibraltar',
    'GRC': 'Greece',
    'GRL': 'Greenland',
    'GRD': 'Grenada',
    'GLP': 'Guadeloupe',
    'GUM': 'Guam',
    'GTM': 'Guatemala',
    'GGY': 'Guernsey',
    'GIN': 'Guinea',
    'GNB': 'Guinea-Bissau',
    'GUY': 'Guyana',
    'HTI': 'Haiti',
    'HMD': 'Heard Island and McDonald Islands',
    'VAT': 'Holy See (the)',
    'HND': 'Honduras',
    'HKG': 'Hong Kong',
    'HUN': 'Hungary',
    'ISL': 'Iceland',
    'IND': 'India',
    'IDN': 'Indonesia',
    'IRN': 'Iran (Islamic Republic of)',
    'IRQ': 'Iraq',
    'IRL': 'Ireland',
    'IMN': 'Isle of Man',
    'ISR': 'Israel',
    'ITA': 'Italy',
    'JAM': 'Jamaica',
    'JPN': 'Japan',
    'JEY': 'Jersey',
    'JOR': 'Jordan',
    'KAZ': 'Kazakhstan',
    'KEN': 'Kenya',
    'KIR': 'Kiribati',
    'PRK': 'Korea (the Democratic People\'s Republic of)',
    'KOR': 'Korea (the Republic of)',
    'KWT': 'Kuwait',
    'KGZ': 'Kyrgyzstan',
    'LAO': 'Lao People\'s Democratic Republic (the)',
    'LVA': 'Latvia',
    'LBN': 'Lebanon',
    'LSO': 'Lesotho',
    'LBR': 'Liberia',
    'LBY': 'Libya',
    'LIE': 'Liechtenstein',
    'LTU': 'Lithuania',
    'LUX': 'Luxembourg',
    'MAC': 'Macao',
    'MDG': 'Madagascar',
    'MWI': 'Malawi',
    'MYS': 'Malaysia',
    'MDV': 'Maldives',
    'MLI': 'Mali',
    'MLT': 'Malta',
    'MHL': 'Marshall Islands (the)',
    'MTQ': 'Martinique',
    'MRT': 'Mauritania',
    'MUS': 'Mauritius',
    'MYT': 'Mayotte',
    'MEX': 'Mexico',
    'FSM': 'Micronesia (Federated States of)',
    'MDA': 'Moldova (the Republic of)',
    'MCO': 'Monaco',
    'MNG': 'Mongolia',
    'MNE': 'Montenegro',
    'MSR': 'Montserrat',
    'MAR': 'Morocco',
    'MOZ': 'Mozambique',
    'MMR': 'Myanmar',
    'NAM': 'Namibia',
    'NRU': 'Nauru',
    'NPL': 'Nepal',
    'NLD': 'Netherlands (the)',
    'NCL': 'New Caledonia',
    'NZL': 'New Zealand',
    'NIC': 'Nicaragua',
    'NER': 'Niger (the)',
    'NGA': 'Nigeria',
    'NIU': 'Niue',
    'NFK': 'Norfolk Island',
    'MNP': 'Northern Mariana Islands (the)',
    'NOR': 'Norway',
    'OMN': 'Oman',
    'PAK': 'Pakistan',
    'PLW': 'Palau',
    'PSE': 'Palestine, State of',
    'PAN': 'Panama',
    'PNG': 'Papua New Guinea',
    'PRY': 'Paraguay',
    'PER': 'Peru',
    'PHL': 'Philippines (the)',
    'PCN': 'Pitcairn',
    'POL': 'Poland',
    'PRT': 'Portugal',
    'PRI': 'Puerto Rico',
    'QAT': 'Qatar',
    'MKD': 'Republic of North Macedonia',
    'ROU': 'Romania',
    'RUS': 'Russian Federation (the)',
    'RWA': 'Rwanda',
    'REU': 'Réunion',
    'BLM': 'Saint Barthélemy',
    'SHN': 'Saint Helena, Ascension and Tristan da Cunha',
    'KNA': 'Saint Kitts and Nevis',
    'LCA': 'Saint Lucia',
    'MAF': 'Saint Martin (French part)',
    'SPM': 'Saint Pierre and Miquelon',
    'VCT': 'Saint Vincent and the Grenadines',
    'WSM': 'Samoa',
    'SMR': 'San Marino',
    'STP': 'Sao Tome and Principe',
    'SAU': 'Saudi Arabia',
    'SEN': 'Senegal',
    'SRB': 'Serbia',
    'SYC': 'Seychelles',
    'SLE': 'Sierra Leone',
    'SGP': 'Singapore',
    'SXM': 'Sint Maarten (Dutch part)',
    'SVK': 'Slovakia',
    'SVN': 'Slovenia',
    'SLB': 'Solomon Islands',
    'SOM': 'Somalia',
    'ZAF': 'South Africa',
    'SGS': 'South Georgia and the South Sandwich Islands',
    'SSD': 'South Sudan',
    'ESP': 'Spain',
    'LKA': 'Sri Lanka',
    'SDN': 'Sudan (the)',
    'SUR': 'Suriname',
    'SJM': 'Svalbard and Jan Mayen',
    'SWZ': 'Eswatini',
    'SWE': 'Sweden',
    'CHE': 'Switzerland',
    'SYR': 'Syrian Arab Republic',
    'TWN': 'Taiwan (Province of China)',
    'TJK': 'Tajikistan',
    'TZA': 'Tanzania, United Republic of',
    'THA': 'Thailand',
    'TLS': 'Timor-Leste',
    'TGO': 'Togo',
    'TKL': 'Tokelau',
    'TON': 'Tonga',
    'TTO': 'Trinidad and Tobago',
    'TUN': 'Tunisia',
    'TUR': 'Turkey',
    'TKM': 'Turkmenistan',
    'TCA': 'Turks and Caicos Islands (the)',
    'TUV': 'Tuvalu',
    'UGA': 'Uganda',
    'UKR': 'Ukraine',
    'ARE': 'United Arab Emirates',
    'GBR': 'United Kingdom of Great Britain and Northern Ireland',
    'USA': 'United States of America (the)',
    'UMI': 'United States Minor Outlying Islands',
    'URY': 'Uruguay',
    'UZB': 'Uzbekistan',
    'VUT': 'Vanuatu',
    'VEN': 'Venezuela (Bolivarian Republic of)',
    'VNM': 'Viet Nam',
    'VGB': 'Virgin Islands (British)',
    'VIR': 'Virgin Islands (U.S.)',
    'WLF': 'Wallis and Futuna',
    'ESH': 'Western Sahara',
    'YEM': 'Yemen',
    'ZMB': 'Zambia',
    'ZWE': 'Zimbabwe'
}

dfC['Country'] = dfC['location_code'].map(code_to_country)
dfC.rename(columns={'year': 'Year'}, inplace=True)
cols = ['Year'] + ['Country'] + [col for col in dfC.columns if col not in ['Year', 'Country']]
dfC = dfC[cols]

total_cells = dfC.size
nan_cells = dfC.isna().sum().sum()
proportion_nan = nan_cells / total_cells if total_cells > 0 else 0
print(f"Proportion of NaN values: {proportion_nan:.2%}")
print("First few rows of the updated DataFrame:")

# Save the updated dataset to a new CSV file (optional)
#dfC.to_csv('updated_country_hsproduct4digit_year.csv', index=False)

# Display the first few rows of the updated DataFrame
dfC.head()

Proportion of NaN values: 8.22%
First few rows of the updated DataFrame:


Unnamed: 0,Year,Country,location_id,product_id,export_value,import_value,export_rca,product_status,cog,distance,normalized_distance,normalized_cog,normalized_pci,export_rpop,is_new,hs_eci,hs_coi,pci,location_code,hs_product_code
0,1994,Aruba,0,650,0.0,4007.0,0.0,,0.000991,0.993055,0.140437,-0.345626,0.058587,0.0,0,0.580963,-0.604102,0.062836,ABW,101
1,1995,Aruba,0,650,18008.0,7199.0,0.13323,,0.001376,0.987581,1.270676,-0.197988,0.031559,0.0,0,-0.502999,-0.713013,0.040311,ABW,101
2,1996,Aruba,0,650,0.0,4021.0,0.0,,0.000861,0.992854,1.147078,-0.338379,-0.047561,0.0,0,-0.681487,-0.70999,-0.042549,ABW,101
3,1997,Aruba,0,650,0.0,0.0,0.0,,0.001408,0.993152,0.181889,-0.172005,-0.014992,0.0,0,-1.290819,-0.824577,-0.006854,ABW,101
4,1998,Aruba,0,650,0.0,0.0,0.0,,0.00153,0.987481,0.819724,-0.072847,0.101849,0.0,0,0.197883,-0.726349,0.107328,ABW,101


In [51]:
for file in fileAC:
    df = pd.read_csv(file)
    
    if 'Date' in df.columns:
        nan_rows_count = df['Date'].isna().sum()
        
        print(f"Number of rows with NaN values in 'Date' column in '{file}': {nan_rows_count}")
    else:
        print(f"'Date' column not found in '{file}'.")

Number of rows with NaN values in 'Date' column in 'Atmospheric_CO%E2%82%82_Concentrations.csv': 0
Number of rows with NaN values in 'Date' column in 'Change_in_Mean_Sea_Levels.csv': 0


In [103]:


fileAC = [
    "Atmospheric_CO%E2%82%82_Concentrations.csv",
    "Change_in_Mean_Sea_Levels.csv"
]
  
for i, file_name in enumerate(fileAC):
    df = pd.read_csv(file_name)
    
    if 'Date' in df.columns:
        df['Year'] = df['Date'].astype(str).str[:4] if i == 0 else df['Date'].astype(str).str[-4:]
    
    if 'ISO3' in df.columns:
        df['Country'] = df['ISO3'].map(code_to_country)
    cols = ['Year'] + ['Country'] + [col for col in df.columns if col not in ['Year', 'Country']]
    df = df[cols]
    
    total_rows = len(df)
    nan_rows = df.isna().any(axis=1).sum()
    nan_proportion = nan_rows / total_rows if total_rows > 0 else 0
    nan_cells = df.isna().sum().sum()
    total_cells = df.size
    cell_nan_proportion = nan_cells / total_cells if total_cells > 0 else 0
    
    print(f"\nProcessed {file_name}:")
    print(f"Number of rows with NaN values: {nan_rows}")
    print(f"Proportion of rows with NaN values: {nan_proportion:.2%}")
    print(f"Number of NaN cells: {nan_cells}")
    print(f"Proportion of NaN cells: {cell_nan_proportion:.2%}")
    
    #new_filename = f"modified_{file_name}"
    #df.to_csv(new_filename, index=False)
    
    print(f"\nProcessed and saved {new_filename}:")
    display(df.head())


Processed Atmospheric_CO%E2%82%82_Concentrations.csv:
Number of rows with NaN values: 1570
Proportion of rows with NaN values: 100.00%
Number of NaN cells: 1570
Proportion of NaN cells: 7.69%

Processed and saved modified_Change_in_Mean_Sea_Levels.csv:


Unnamed: 0,Year,Country,ObjectId,ISO2,ISO3,Indicator,Unit,Source,CTS_Code,CTS_Name,CTS_Full_Descriptor,Date,Value
0,1958,World,1,,WLD,Monthly Atmospheric Carbon Dioxide Concentrations,Parts Per Million,"Dr. Pieter Tans, National Oceanic and Atmosphe...",ECCA,Atmospheric Carbon Dioxide Concentrations,"Environment, Climate Change, Climate and Weath...",1958M03,315.7
1,1958,World,2,,WLD,Monthly Atmospheric Carbon Dioxide Concentrations,Parts Per Million,"Dr. Pieter Tans, National Oceanic and Atmosphe...",ECCA,Atmospheric Carbon Dioxide Concentrations,"Environment, Climate Change, Climate and Weath...",1958M04,317.45
2,1958,World,3,,WLD,Monthly Atmospheric Carbon Dioxide Concentrations,Parts Per Million,"Dr. Pieter Tans, National Oceanic and Atmosphe...",ECCA,Atmospheric Carbon Dioxide Concentrations,"Environment, Climate Change, Climate and Weath...",1958M05,317.51
3,1958,World,4,,WLD,Monthly Atmospheric Carbon Dioxide Concentrations,Parts Per Million,"Dr. Pieter Tans, National Oceanic and Atmosphe...",ECCA,Atmospheric Carbon Dioxide Concentrations,"Environment, Climate Change, Climate and Weath...",1958M06,317.24
4,1958,World,5,,WLD,Monthly Atmospheric Carbon Dioxide Concentrations,Parts Per Million,"Dr. Pieter Tans, National Oceanic and Atmosphe...",ECCA,Atmospheric Carbon Dioxide Concentrations,"Environment, Climate Change, Climate and Weath...",1958M07,315.86



Processed Change_in_Mean_Sea_Levels.csv:
Number of rows with NaN values: 35604
Proportion of rows with NaN values: 100.00%
Number of NaN cells: 35604
Proportion of NaN cells: 7.14%

Processed and saved modified_Change_in_Mean_Sea_Levels.csv:


Unnamed: 0,Year,Country,ObjectId,ISO2,ISO3,Indicator,Unit,Source,CTS_Code,CTS_Name,CTS_Full_Descriptor,Measure,Date,Value
0,1992,World,1,,WLD,Change in mean sea level: Sea level: TOPEX.Pos...,Millimeters,National Oceanic and Atmospheric Administratio...,ECCL,Change in Mean Sea Level,"Environment, Climate Change, Climate Indicator...",Andaman Sea,D12/17/1992,-10.34
1,1992,World,2,,WLD,Change in mean sea level: Sea level: TOPEX.Pos...,Millimeters,National Oceanic and Atmospheric Administratio...,ECCL,Change in Mean Sea Level,"Environment, Climate Change, Climate Indicator...",Arabian Sea,D12/17/1992,-18.46
2,1992,World,3,,WLD,Change in mean sea level: Sea level: TOPEX.Pos...,Millimeters,National Oceanic and Atmospheric Administratio...,ECCL,Change in Mean Sea Level,"Environment, Climate Change, Climate Indicator...",Atlantic Ocean,D12/17/1992,-15.41
3,1992,World,4,,WLD,Change in mean sea level: Sea level: TOPEX.Pos...,Millimeters,National Oceanic and Atmospheric Administratio...,ECCL,Change in Mean Sea Level,"Environment, Climate Change, Climate Indicator...",Baltic Sea,D12/17/1992,196.85
4,1992,World,5,,WLD,Change in mean sea level: Sea level: TOPEX.Pos...,Millimeters,National Oceanic and Atmospheric Administratio...,ECCL,Change in Mean Sea Level,"Environment, Climate Change, Climate Indicator...",Bay Bengal,D12/17/1992,3.27


In [104]:
file_names = [
    "Carbon_Footprint_of_Bank_Loans.csv",
    "Climate-driven INFORM Risk.csv",
    "Green Debt.csv"
]


for i, file_name in enumerate(file_names):
    data_frame = pd.read_csv(file_name)


    if 'year' in data_frame.columns:
        data_frame.rename(columns={'year': 'Year'}, inplace=True)
    
    if 'Country' in data_frame.columns:
        cols = ['Year', 'Country'] + [col for col in data_frame.columns if col not in ['Year', 'Country']]
        data_frame = data_frame[cols]
        

    total_rows = len(data_frame)
    nan_rows = data_frame.isna().any(axis=1).sum()
    nan_proportion_rows = nan_rows / total_rows if total_rows > 0 else 0
    nan_cells = data_frame.isna().sum().sum()
    total_cells = data_frame.size
    nan_proportion_cells = nan_cells / total_cells if total_cells > 0 else 0
    
    print(f"\nProcessed {file_name}:")
    print(f"Number of rows with NaN values: {nan_rows}")
    print(f"Proportion of rows with NaN values: {nan_proportion_rows:.2%}")
    print(f"Number of NaN cells: {nan_cells}")
    print(f"Proportion of NaN cells: {nan_proportion_cells:.2%}")
    
        
    # Save the modified DataFrame
    #data_frame.to_csv(f"modified_{file_name}", index=False)
    
    print(f"\nProcessed {file_name}:")
    display(data_frame.head())


Processed Carbon_Footprint_of_Bank_Loans.csv:
Number of rows with NaN values: 568
Proportion of rows with NaN values: 25.36%
Number of NaN cells: 568
Proportion of NaN cells: 2.11%

Processed Carbon_Footprint_of_Bank_Loans.csv:


Unnamed: 0,Year,Country,CTS_Code,CTS_Full_Descriptor,CTS_Name,ISO2,ISO3,Indicator,ObjectId,Source,Unit,value
0,2005,Argentina,ECFLIN,"Environment, Climate Change, Financial and Phy...",Carbon Footprint-Adjusted Loans to Total Loans...,AR,ARG,Carbon Footprint of Bank Loans (Based on emiss...,1,"OECD (2021), OECD Inter-Country Input-Output D...",Ratio,
1,2005,Argentina,ECFLI,"Environment, Climate Change, Financial and Phy...",Carbon Footprint-Adjusted Loans to Total Loans...,AR,ARG,Carbon Footprint of Bank Loans (Based on emiss...,2,"OECD (2021), OECD Inter-Country Input-Output D...",Ratio,
2,2005,Argentina,ECFLMN,"Environment, Climate Change, Financial and Phy...",Carbon Footprint-Adjusted Loans to Total Loans...,AR,ARG,Carbon Footprint of Bank Loans (Based on emiss...,3,"OECD (2021), OECD Inter-Country Input-Output D...",Ratio,
3,2005,Argentina,ECFLM,"Environment, Climate Change, Financial and Phy...",Carbon Footprint-Adjusted Loans to Total Loans...,AR,ARG,Carbon Footprint of Bank Loans (Based on emiss...,4,"OECD (2021), OECD Inter-Country Input-Output D...",Ratio,
4,2005,Belgium,ECFLIN,"Environment, Climate Change, Financial and Phy...",Carbon Footprint-Adjusted Loans to Total Loans...,BE,BEL,Carbon Footprint of Bank Loans (Based on emiss...,5,"OECD (2021), OECD Inter-Country Input-Output D...",Ratio,



Processed Climate-driven INFORM Risk.csv:
Number of rows with NaN values: 160
Proportion of rows with NaN values: 2.09%
Number of NaN cells: 1240
Proportion of NaN cells: 1.35%

Processed Climate-driven INFORM Risk.csv:


Unnamed: 0,Year,Country,CTS Code,CTS Full Descriptor,CTS Name,ISO2,ISO3,Indicator,ObjectId,Source,Unit,value
0,2013,"Afghanistan, Islamic Rep. of",ECFRMH,"Environment, Climate Change, Financial and Phy...",Climate-Driven Hazard & Exposure,AF,AFG,Climate-driven Hazard & Exposure,1,Disaster Risk Management Knowledge Centre (DRM...,Index,6.3
1,2013,"Afghanistan, Islamic Rep. of",ECFRMR,"Environment, Climate Change, Financial and Phy...",Climate-Driven Inform Risk Indicator,AF,AFG,Climate-driven INFORM Risk Indicator,2,Disaster Risk Management Knowledge Centre (DRM...,Index,7.3
2,2013,"Afghanistan, Islamic Rep. of",ECFRMC,"Environment, Climate Change, Financial and Phy...",Index for Risk Management; Lack of Coping Capa...,AF,AFG,Lack of coping capacity,3,Disaster Risk Management Knowledge Centre (DRM...,Index,8.2
3,2013,"Afghanistan, Islamic Rep. of",ECFRMV,"Environment, Climate Change, Financial and Phy...",Index for Risk Management; Vulnerability,AF,AFG,Vulnerability,4,Disaster Risk Management Knowledge Centre (DRM...,Index,7.6
4,2013,Albania,ECFRMH,"Environment, Climate Change, Financial and Phy...",Climate-Driven Hazard & Exposure,AL,ALB,Climate-driven Hazard & Exposure,5,Disaster Risk Management Knowledge Centre (DRM...,Index,4.3



Processed Green Debt.csv:
Number of rows with NaN values: 8770
Proportion of rows with NaN values: 95.02%
Number of NaN cells: 15011
Proportion of NaN cells: 10.84%

Processed Green Debt.csv:


Unnamed: 0,Year,Country,CTS_Code,CTS_Full_Descriptor,CTS_Name,ISO2,ISO3,Indicator,ObjectId,Principal_Currency,Source,Type_of_Issuer,Unit,Use_of_Proceed,value
0,1990,Argentina,ECFFI,"Environment, Climate Change, Financial and Phy...",Green Bonds Issuances,AR,ARG,Green Bond Issuances by Country,1,Not Applicable,Refinitiv. Accessed on 2023-03-29; Country aut...,Not Applicable,Billion US Dollars,Not Applicable,
1,1990,Australia,ECFFI,"Environment, Climate Change, Financial and Phy...",Green Bonds Issuances,AU,AUS,Green Bond Issuances by Country,2,Not Applicable,Refinitiv. Accessed on 2023-03-29; Country aut...,Not Applicable,Billion US Dollars,Not Applicable,
2,1990,Austria,ECFFI,"Environment, Climate Change, Financial and Phy...",Green Bonds Issuances,AT,AUT,Green Bond Issuances by Country,3,Not Applicable,Refinitiv. Accessed on 2023-03-29; Country aut...,Not Applicable,Billion US Dollars,Not Applicable,
3,1990,Austria,ECFF,"Environment, Climate Change, Financial and Phy...",Green Bonds,AT,AUT,Sovereign Green Bond Issuances,4,Not Applicable,Refinitiv. Accessed on 2023-03-29; Country aut...,Not Applicable,Billion US Dollars,Not Applicable,
4,1990,Bangladesh,ECFFI,"Environment, Climate Change, Financial and Phy...",Green Bonds Issuances,BD,BGD,Green Bond Issuances by Country,5,Not Applicable,Refinitiv. Accessed on 2023-03-29; Country aut...,Not Applicable,Billion US Dollars,Not Applicable,


In [108]:
filesY = [
    'BTZ_raw_na copy.csv',
    'BTO_raw_na copy.csv',
    'FSS_raw_na copy.csv',
    'FCD_raw_na copy.csv',
    'PSU_raw_na copy.csv',
    'PRS_raw_na copy.csv',
    'RMS_raw_na copy.csv',
    'RCY_raw_na copy.csv',
    'SNM_raw_na copy.csv'
]

def rename_columns_to_years(df):
    new_columns = {}
    for col in df.columns:
        parts = col.split('.')
        if len(parts) > 1 and parts[-1].isdigit():
            new_columns[col] = parts[-1] 
    df = df.rename(columns=new_columns)
    return df

for file in filesY:
    try:
        df = pd.read_csv(file)
        modified_df = rename_columns_to_years(df)
        
        total_rows = len(modified_df)
        nan_rows = modified_df.isna().any(axis=1).sum()
        nan_proportion_rows = nan_rows / total_rows if total_rows > 0 else 0
        nan_cells = modified_df.isna().sum().sum()
        total_cells = modified_df.size
        nan_proportion_cells = nan_cells / total_cells if total_cells > 0 else 0

        print(f"\nProcessed {file}:")
        print(f"Number of rows with NaN values: {nan_rows}")
        print(f"Proportion of rows with NaN values: {nan_proportion_rows:.2%}")
        print(f"Number of NaN cells: {nan_cells}")
        print(f"Proportion of NaN cells: {nan_proportion_cells:.2%}")
        
        print("\nFirst few rows of the DataFrame:")
        display(modified_df.head())
        
    except Exception as e:
        print(f"Error reading {file}: {e}")


Processed BTZ_raw_na copy.csv:
Number of rows with NaN values: 71
Proportion of rows with NaN values: 32.27%
Number of NaN cells: 4970
Proportion of NaN cells: 30.95%

First few rows of the DataFrame:


Unnamed: 0,code,iso,country,1950,1951,1952,1953,1954,1955,1956,...,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,4,AFG,Afghanistan,,,,,,,,...,,,,,,,,,,
1,8,ALB,Albania,0.7688,0.7688,0.7688,0.768452,0.766511,0.767103,0.767404,...,0.653983,0.796816,0.79761,0.797559,0.734199,0.804167,0.758582,0.744352,0.753202,0.752195
2,12,DZA,Algeria,0.295667,0.280603,0.299795,0.284328,0.28101,0.30597,0.306406,...,0.240564,0.200641,0.200786,0.191498,0.191077,0.205029,0.211157,0.200514,0.210681,0.209007
3,20,AND,Andorra,,,,,,,,...,,,,,,,,,,
4,24,AGO,Angola,0.433832,0.531701,0.557969,0.649529,0.650734,0.730928,0.468986,...,0.403239,0.420887,0.424467,0.434922,0.433772,0.355287,0.380418,0.397531,0.386236,0.406875



Processed BTO_raw_na copy.csv:
Number of rows with NaN values: 51
Proportion of rows with NaN values: 23.18%
Number of NaN cells: 3570
Proportion of NaN cells: 22.23%

First few rows of the DataFrame:


Unnamed: 0,code,iso,country,1950,1951,1952,1953,1954,1955,1956,...,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,4,AFG,Afghanistan,,,,,,,,...,,,,,,,,,,
1,8,ALB,Albania,0.7688,0.7688,0.7688,0.7688,0.7688,0.7688,0.7688,...,0.796531,0.797089,0.79761,0.797613,0.735155,0.804167,0.758582,0.73871,0.753203,0.752242
2,12,DZA,Algeria,0.295584,0.280289,0.299501,0.284015,0.280807,0.305654,0.306211,...,0.256522,0.222385,0.219291,0.211598,0.209722,0.209618,0.201776,0.210563,0.21847,0.219109
3,20,AND,Andorra,,,,,,,,...,,,,,,,,,,
4,24,AGO,Angola,0.435833,0.534638,0.558921,0.650964,0.65271,0.735869,0.471598,...,0.31781,0.348157,0.348447,0.371065,0.38062,0.26136,0.305799,0.322919,0.312334,0.272262



Processed FSS_raw_na copy.csv:
Number of rows with NaN values: 102
Proportion of rows with NaN values: 46.36%
Number of NaN cells: 7140
Proportion of NaN cells: 44.46%

First few rows of the DataFrame:


Unnamed: 0,code,iso,country,1950,1951,1952,1953,1954,1955,1956,...,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,4,AFG,Afghanistan,,,,,,,,...,,,,,,,,,,
1,8,ALB,Albania,,,,,,,,...,,,,,,,,,,
2,12,DZA,Algeria,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.11,0.149985,0.189981,0.2,0.18,0.15,0.149985,0.26,0.290029,0.25
3,20,AND,Andorra,,,,,,,,...,,,,,,,,,,
4,24,AGO,Angola,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,5.82,5.129487,5.39,4.739526,4.939506,5.349465,7.09,6.759324,6.0,3.820382



Processed FCD_raw_na copy.csv:
Number of rows with NaN values: 59
Proportion of rows with NaN values: 26.82%
Number of NaN cells: 4130
Proportion of NaN cells: 25.72%

First few rows of the DataFrame:


Unnamed: 0,code,iso,country,1950,1951,1952,1953,1954,1955,1956,...,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,4,AFG,Afghanistan,,,,,,,,...,,,,,,,,,,
1,8,ALB,Albania,0.134816,0.134816,0.134816,0.134816,0.134816,0.134816,0.134816,...,0.236265,0.236566,0.236721,0.236722,0.222876,0.238368,0.228135,0.223235,0.239848,0.238325
2,12,DZA,Algeria,0.080058,0.082022,0.072062,0.083843,0.086516,0.083039,0.092796,...,0.11172,0.113134,0.113115,0.115036,0.116595,0.117291,0.115425,0.127496,0.133867,0.128939
3,20,AND,Andorra,,,,,,,,...,,,,,,,,,,
4,24,AGO,Angola,0.072122,0.075951,0.08969,0.087664,0.075249,0.067668,0.061556,...,0.079504,0.085128,0.083337,0.085952,0.094812,0.074632,0.074323,0.067178,0.090733,0.090342



Processed PSU_raw_na copy.csv:
Number of rows with NaN values: 56
Proportion of rows with NaN values: 25.45%
Number of NaN cells: 2379
Proportion of NaN cells: 18.02%

First few rows of the DataFrame:


Unnamed: 0,code,iso,country,1965,1966,1967,1968,1969,1970,1971,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,4,AFG,Afghanistan,0.0,0.0,0.0,0.0,0.0,0.0,0.00986,...,0.0,0.0,0.0,0.0,0.0,0.0,0.02202,0.02202,0.02202,0.02202
1,8,ALB,Albania,8.07366,8.12122,8.48982,10.12838,11.94626,13.43988,15.0533,...,13.66118,13.94386,14.35862,14.86264,15.6171,15.94782,15.28444,14.76248,14.30808,13.08288
2,12,DZA,Algeria,0.12262,0.13118,0.16888,0.1832,0.31448,0.62434,1.11674,...,0.0,0.0,0.19328,0.19468,0.19468,0.19468,0.19468,0.0014,0.0,0.0
3,20,AND,Andorra,,,,,,,,...,,,,,,,,,,
4,24,AGO,Angola,0.0,0.0,0.0,0.0,0.0,0.0,0.00656,...,0.2882,0.2882,0.29942,0.29942,0.20814,0.03034,0.03034,0.01912,0.01912,0.01912



Processed PRS_raw_na copy.csv:
Number of rows with NaN values: 0
Proportion of rows with NaN values: 0.00%
Number of NaN cells: 0
Proportion of NaN cells: 0.00%

First few rows of the DataFrame:


Unnamed: 0,code,iso,country,2015,2018
0,4,AFG,Afghanistan,1.771012,2.16036
1,8,ALB,Albania,1.486172,1.542041
2,12,DZA,Algeria,1.049052,1.142514
3,20,AND,Andorra,0.201284,0.329801
4,24,AGO,Angola,0.132794,0.098512



Processed RMS_raw_na copy.csv:
Number of rows with NaN values: 72
Proportion of rows with NaN values: 32.73%
Number of NaN cells: 2232
Proportion of NaN cells: 29.84%

First few rows of the DataFrame:


Unnamed: 0,code,iso,country,1989,1990,1991,1992,1993,1994,1995,...,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,4,AFG,Afghanistan,,,,,,,,...,,,,,,,,,,
1,8,ALB,Albania,-0.005597,-0.004843,-0.002695,0.001027,0.005361,0.009943,0.012073,...,0.015386,0.012485,0.009962,0.010516,0.011537,0.013567,0.019766,0.023382,0.023395,0.020991
2,12,DZA,Algeria,-0.000916,-0.00148,-0.001922,-0.002207,-0.002476,-0.002698,-0.003211,...,-0.008768,-0.008384,-0.007282,-0.005561,-0.003389,-0.00113,0.000732,0.002044,0.002593,0.002449
3,20,AND,Andorra,,,,,,,,...,,,,,,,,,,
4,24,AGO,Angola,0.001638,-0.003592,-0.008934,-0.013985,-0.01714,-0.01746,-0.015899,...,0.002779,0.001362,-0.000248,-0.00164,-0.002839,-0.00339,-0.003292,-0.003087,-0.002578,-0.001402



Processed RCY_raw_na copy.csv:
Number of rows with NaN values: 115
Proportion of rows with NaN values: 52.27%
Number of NaN cells: 6254
Proportion of NaN cells: 43.73%

First few rows of the DataFrame:


Unnamed: 0,code,iso,country,1961,1962,1963,1964,1965,1966,1967,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
0,4,AFG,Afghanistan,0.240149,0.234154,0.212162,0.230196,0.236612,0.2184,0.261943,...,0.488597,0.480885,0.505187,0.467792,0.480772,0.513477,0.502485,0.466749,0.466922,0.497401
1,8,ALB,Albania,0.126671,0.147072,0.145808,0.15552,0.16009,0.196864,0.213828,...,0.742232,0.732016,0.731333,0.707836,0.722891,0.726639,0.752388,0.773541,0.755823,0.77186
2,12,DZA,Algeria,0.091167,0.223836,0.216016,0.13482,0.157635,0.10424,0.162476,...,0.520459,0.403357,0.407199,0.303705,0.296957,0.554528,0.502806,0.437055,0.415074,0.470046
3,20,AND,Andorra,,,,,,,,...,,,,,,,,,,
4,24,AGO,Angola,0.206844,0.203782,0.199973,0.209926,0.213229,0.208054,0.211454,...,0.383865,0.287596,0.300457,0.238109,0.241755,0.267516,0.272603,0.281988,0.282192,0.293627



Processed SNM_raw_na copy.csv:
Number of rows with NaN values: 56
Proportion of rows with NaN values: 25.45%
Number of NaN cells: 2379
Proportion of NaN cells: 18.02%

First few rows of the DataFrame:


Unnamed: 0,code,iso,country,1965,1966,1967,1968,1969,1970,1971,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,4,AFG,Afghanistan,0.91834,0.926166,0.932354,0.93541,0.939422,0.953124,0.966273,...,0.850809,0.826859,0.827125,0.83929,0.838298,0.882097,0.924573,0.933344,0.916546,0.905754
1,8,ALB,Albania,1.045958,1.024855,1.015333,1.01469,1.019563,1.021863,1.037909,...,1.048921,1.039574,1.034436,1.036758,1.038646,1.038099,1.038121,1.040979,1.034101,1.0269
2,12,DZA,Algeria,0.961318,0.980744,0.964646,0.956481,0.958821,0.96322,0.972674,...,0.855739,0.818394,0.828825,0.834559,0.843166,0.855784,0.848197,0.82309,0.808397,0.807832
3,20,AND,Andorra,,,,,,,,...,,,,,,,,,,
4,24,AGO,Angola,0.913652,0.911994,0.91105,0.909695,0.907036,0.903895,0.903129,...,0.881191,0.869942,0.885117,0.896604,0.905854,0.881121,0.887776,0.875268,0.862968,0.853191
