In [2]:
import pandas as pd

# Load the dataset skipping the first 4 rows
df = pd.read_csv('API_EN.ATM.GHGT.KT.CE_DS2_en_csv_v2_5607282.csv', skiprows=4)

# Define the columns that represent years
year_columns = [str(year) for year in range(1960, 2023)]

# Select only the relevant columns
df_years = df[['Country Name', 'Country Code'] + year_columns]

# Melt the data
df_melted = df_years.melt(id_vars=['Country Name', 'Country Code'], 
                          var_name='Year', 
                          value_name='Value')

# Remove rows where Value is NaN
df_melted = df_melted.dropna(subset=['Value']).reset_index(drop=True)

# Display the first few rows of the reshaped dataset
print(df_melted.head())


                  Country Name Country Code  Year          Value
0  Africa Eastern and Southern          AFE  1990  798246.454045
1                  Afghanistan          AFG  1990   11630.795060
2   Africa Western and Central          AFW  1990  415342.674031
3                       Angola          AGO  1990   43185.609210
4                      Albania          ALB  1990   11181.074270


In [4]:
df_melted.to_csv('total-ghg-emissions.csv')

In [39]:
# Load the dataset skipping the first 4 rows
df = pd.read_excel('EDGAR_CH4_1970-2021.xlsx', skiprows=10)

# Define the columns that represent years
year_columns = ["Y_"+str(year) for year in range(1970, 2022)]

# Select only the relevant columns
df_years = df[['Country_code_A3', "Name", "C_group_IM24_sh", "Substance"] + year_columns]

# Melt the data
df_melted = df_years.melt(id_vars=['Name', 'Country_code_A3', "C_group_IM24_sh", "Substance"], 
                          var_name='Year', 
                          value_name='Value')

# Remove rows where Value is NaN
df_melted = df_melted.dropna(subset=['Value']).reset_index(drop=True)

# Display the first few rows of the reshaped dataset
print(df_melted.head())


df_melted['Year'] = df_melted['Year'].str.slice(2).astype(int)
df_melted

    Name Country_code_A3       C_group_IM24_sh Substance    Year     Value
0  Aruba             ABW  Rest Central America       CH4  Y_1970  0.000007
1  Aruba             ABW  Rest Central America       CH4  Y_1970  0.003669
2  Aruba             ABW  Rest Central America       CH4  Y_1970  0.002518
3  Aruba             ABW  Rest Central America       CH4  Y_1970  0.000145
4  Aruba             ABW  Rest Central America       CH4  Y_1970  0.004200


Unnamed: 0,Name,Country_code_A3,C_group_IM24_sh,Substance,Year,Value
0,Aruba,ABW,Rest Central America,CH4,1970,0.000007
1,Aruba,ABW,Rest Central America,CH4,1970,0.003669
2,Aruba,ABW,Rest Central America,CH4,1970,0.002518
3,Aruba,ABW,Rest Central America,CH4,1970,0.000145
4,Aruba,ABW,Rest Central America,CH4,1970,0.004200
...,...,...,...,...,...,...
203226,Zimbabwe,ZWE,Southern_Africa,CH4,2021,7.776668
203227,Zimbabwe,ZWE,Southern_Africa,CH4,2021,0.503485
203228,Zimbabwe,ZWE,Southern_Africa,CH4,2021,27.797454
203229,Zimbabwe,ZWE,Southern_Africa,CH4,2021,-0.021157


In [46]:
# Load the dataset skipping the first 4 rows
df = pd.read_excel('EDGAR_F-gases_1990-2021.xlsx', skiprows=10)

# Define the columns that represent years
year_columns = ["Y_"+str(year) for year in range(1990, 2022)]

# Select only the relevant columns
df_years = df[['Country_code_A3', "Name", "C_group_IM24_sh", "Substance"] + year_columns]

# Melt the data
df_melted = df_years.melt(id_vars=['Name', 'Country_code_A3', "C_group_IM24_sh", "Substance"], 
                          var_name='Year', 
                          value_name='Value')

# Remove rows where Value is NaN
df_melted = df_melted.dropna(subset=['Value']).reset_index(drop=True)

# Display the first few rows of the reshaped dataset
print(df_melted.head())


df_melted['Year'] = df_melted['Year'].str.slice(2).astype(int)
df_melted

          Name Country_code_A3  C_group_IM24_sh  Substance    Year  \
0  Afghanistan             AFG          India +   HFC-134a  Y_1990   
1  Afghanistan             AFG          India +   HFC-152a  Y_1990   
2       Angola             AGO  Southern_Africa  HCFC-141b  Y_1990   
3       Angola             AGO  Southern_Africa  HCFC-142b  Y_1990   
4       Angola             AGO  Southern_Africa   HFC-134a  Y_1990   

          Value  
0  1.279760e-04  
1  2.897250e-05  
2  2.459310e-08  
3  8.721080e-06  
4  8.694760e-05  


Unnamed: 0,Name,Country_code_A3,C_group_IM24_sh,Substance,Year,Value
0,Afghanistan,AFG,India +,HFC-134a,1990,1.279760e-04
1,Afghanistan,AFG,India +,HFC-152a,1990,2.897250e-05
2,Angola,AGO,Southern_Africa,HCFC-141b,1990,2.459310e-08
3,Angola,AGO,Southern_Africa,HCFC-142b,1990,8.721080e-06
4,Angola,AGO,Southern_Africa,HFC-134a,1990,8.694760e-05
...,...,...,...,...,...,...
55843,Zimbabwe,ZWE,Southern_Africa,HFC-152a,2021,5.536131e-02
55844,Zimbabwe,ZWE,Southern_Africa,HFC-227ea,2021,5.009794e-03
55845,Zimbabwe,ZWE,Southern_Africa,HFC-245fa,2021,1.919408e-02
55846,Zimbabwe,ZWE,Southern_Africa,HFC-32,2021,5.258080e-03


In [47]:
# Load the dataset skipping the first 4 rows
df = pd.read_excel('EDGAR_N2O_1970-2021.xlsx', skiprows=10)

# Define the columns that represent years
year_columns = ["Y_"+str(year) for year in range(1970, 2022)]

# Select only the relevant columns
df_years = df[['Country_code_A3', "Name", "C_group_IM24_sh", "Substance"] + year_columns]

# Melt the data
df_melted = df_years.melt(id_vars=['Name', 'Country_code_A3', "C_group_IM24_sh", "Substance"], 
                          var_name='Year', 
                          value_name='Value')

# Remove rows where Value is NaN
df_melted = df_melted.dropna(subset=['Value']).reset_index(drop=True)

# Display the first few rows of the reshaped dataset
print(df_melted.head())


df_melted['Year'] = df_melted['Year'].str.slice(2).astype(int)
df_melted

    Name Country_code_A3       C_group_IM24_sh Substance    Year         Value
0  Aruba             ABW  Rest Central America       N2O  Y_1970  8.958735e-07
1  Aruba             ABW  Rest Central America       N2O  Y_1970  4.891469e-04
2  Aruba             ABW  Rest Central America       N2O  Y_1970  3.208158e-05
3  Aruba             ABW  Rest Central America       N2O  Y_1970  2.227203e-06
4  Aruba             ABW  Rest Central America       N2O  Y_1970  5.676212e-06


Unnamed: 0,Name,Country_code_A3,C_group_IM24_sh,Substance,Year,Value
0,Aruba,ABW,Rest Central America,N2O,1970,8.958735e-07
1,Aruba,ABW,Rest Central America,N2O,1970,4.891469e-04
2,Aruba,ABW,Rest Central America,N2O,1970,3.208158e-05
3,Aruba,ABW,Rest Central America,N2O,1970,2.227203e-06
4,Aruba,ABW,Rest Central America,N2O,1970,5.676212e-06
...,...,...,...,...,...,...
208960,Zimbabwe,ZWE,Southern_Africa,N2O,2021,1.297739e+01
208961,Zimbabwe,ZWE,Southern_Africa,N2O,2021,8.230079e-01
208962,Zimbabwe,ZWE,Southern_Africa,N2O,2021,1.959333e-03
208963,Zimbabwe,ZWE,Southern_Africa,N2O,2021,5.657619e-01


In [63]:
# Load the dataset skipping the first 4 rows
df = pd.read_excel('EDGARv7.0_GHG_AR4_AR5.xlsx', skiprows=4)

# Define the columns that represent years
year_columns = [year for year in range(1970, 2022)]

# df.columns = [str(int(float(col))) if col.replace('.','').isdigit() else col for col in df.columns]
# Select only the relevant columns
df_years = df[['Sector', "EDGAR Country Code", "Country"] + year_columns]

# Melt the data
df_melted = df_years.melt(id_vars=['Sector', "EDGAR Country Code", "Country"], 
                          var_name='Year', 
                          value_name='Value')

# Remove rows where Value is NaN
df_melted = df_melted.dropna(subset=['Value']).reset_index(drop=True)

# Display the first few rows of the reshaped dataset
print(df_melted.head())


# df_melted['Year'] = df_melted['Year'].str.slice(2).astype(int)
df_melted

           Sector EDGAR Country Code      Country  Year     Value
0  Power Industry                AFG  Afghanistan  1970  0.057348
1  Power Industry                ALB      Albania  1970  0.699584
2  Power Industry                DZA      Algeria  1970  1.649856
3  Power Industry                AGO       Angola  1970  0.138005
4  Power Industry                AIA     Anguilla  1970  0.000431


Unnamed: 0,Sector,EDGAR Country Code,Country,Year,Value
0,Power Industry,AFG,Afghanistan,1970,0.057348
1,Power Industry,ALB,Albania,1970,0.699584
2,Power Industry,DZA,Algeria,1970,1.649856
3,Power Industry,AGO,Angola,1970,0.138005
4,Power Industry,AIA,Anguilla,1970,0.000431
...,...,...,...,...,...
53689,Other sectors,VNM,Vietnam,2021,189.683875
53690,Other sectors,ESH,Western Sahara,2021,0.204054
53691,Other sectors,YEM,Yemen,2021,19.752768
53692,Other sectors,ZMB,Zambia,2021,18.595530
