# Clean CO2 table

In [51]:
import pandas as pd
co = pd.read_csv("co2_data/MtCO2_flat.csv")
co

Unnamed: 0,Country,ISO 3166-1 alpha-3,Year,Total,Coal,Oil,Gas,Cement,Flaring,Other,Per Capita
0,Afghanistan,AFG,1750,0.000000,,,,,,,
1,Afghanistan,AFG,1751,0.000000,,,,,,,
2,Afghanistan,AFG,1752,0.000000,,,,,,,
3,Afghanistan,AFG,1753,0.000000,,,,,,,
4,Afghanistan,AFG,1754,0.000000,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
63099,Global,WLD,2017,36096.739276,14506.973805,12242.627935,7144.928128,1507.923185,391.992176,302.294047,4.749682
63100,Global,WLD,2018,36826.506600,14746.830688,12266.016285,7529.846784,1569.218392,412.115746,302.478706,4.792753
63101,Global,WLD,2019,37082.558969,14725.978025,12345.653374,7647.528220,1617.506786,439.253991,306.638573,4.775633
63102,Global,WLD,2020,35264.085734,14174.564010,11191.808551,7556.290283,1637.537532,407.583673,296.301685,4.497423


## Edit column names

In [52]:
def clean_columns(df):
    df.columns = [column.replace(" ", "_").lower() for column in df.columns]

In [53]:
clean_columns(co)

In [54]:
co

Unnamed: 0,country,iso_3166-1_alpha-3,year,total,coal,oil,gas,cement,flaring,other,per_capita
0,Afghanistan,AFG,1750,0.000000,,,,,,,
1,Afghanistan,AFG,1751,0.000000,,,,,,,
2,Afghanistan,AFG,1752,0.000000,,,,,,,
3,Afghanistan,AFG,1753,0.000000,,,,,,,
4,Afghanistan,AFG,1754,0.000000,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
63099,Global,WLD,2017,36096.739276,14506.973805,12242.627935,7144.928128,1507.923185,391.992176,302.294047,4.749682
63100,Global,WLD,2018,36826.506600,14746.830688,12266.016285,7529.846784,1569.218392,412.115746,302.478706,4.792753
63101,Global,WLD,2019,37082.558969,14725.978025,12345.653374,7647.528220,1617.506786,439.253991,306.638573,4.775633
63102,Global,WLD,2020,35264.085734,14174.564010,11191.808551,7556.290283,1637.537532,407.583673,296.301685,4.497423


### Keep only needed columns

In [55]:
co_filtered = co[["country", "year", "total"]]
co_filtered

Unnamed: 0,country,year,total
0,Afghanistan,1750,0.000000
1,Afghanistan,1751,0.000000
2,Afghanistan,1752,0.000000
3,Afghanistan,1753,0.000000
4,Afghanistan,1754,0.000000
...,...,...,...
63099,Global,2017,36096.739276
63100,Global,2018,36826.506600
63101,Global,2019,37082.558969
63102,Global,2020,35264.085734


### Keep years from 1950 to 2024

In [56]:
co_filtered_years = co_filtered[co_filtered["year"].isin(range(1950,2025))]
co_filtered_years

Unnamed: 0,country,year,total
200,Afghanistan,1950,0.084272
201,Afghanistan,1951,0.091600
202,Afghanistan,1952,0.091600
203,Afghanistan,1953,0.106256
204,Afghanistan,1954,0.106256
...,...,...,...
63099,Global,2017,36096.739276
63100,Global,2018,36826.506600
63101,Global,2019,37082.558969
63102,Global,2020,35264.085734


## Clean null values and duplicates

In [57]:
co_filtered_years.isna().any()

country    False
year       False
total      False
dtype: bool

In [58]:
co_filtered_years.duplicated().any()

False

## Reset index

In [59]:
co_final = co_filtered_years.reset_index(drop=True)

## Rename co2 column

In [63]:
co_final = co_final.rename(columns={"total" : "co2"})

In [64]:
co_final

Unnamed: 0,country,year,co2
0,Afghanistan,1950,0.084272
1,Afghanistan,1951,0.091600
2,Afghanistan,1952,0.091600
3,Afghanistan,1953,0.106256
4,Afghanistan,1954,0.106256
...,...,...,...
16699,Global,2017,36096.739276
16700,Global,2018,36826.506600
16701,Global,2019,37082.558969
16702,Global,2020,35264.085734


## Export

In [65]:
co_final.to_csv("clean_co2.csv")