## **The start of analysis with all empty values removed**

In [35]:
import pandas as pd
import numpy as np
import seaborn as sbn
import matplotlib.pyplot as plt
import sys
# suppress warnings
if not sys.warnoptions:
    import warnings
    warnings.simplefilter("ignore")

try:
    dataCo2 = pd.read_csv('CO2.csv')
    dataCo2= pd.DataFrame(dataCo2)
except:
    FileNotFoundError 
    print("data can´t be found")

print(dataCo2) # to see the basic structure of the data

           Country ISO 3166-1 alpha-3  UN M49  Year         Total  \
0      Afghanistan                AFG       4  1750           NaN   
1      Afghanistan                AFG       4  1751           NaN   
2      Afghanistan                AFG       4  1752           NaN   
3      Afghanistan                AFG       4  1753           NaN   
4      Afghanistan                AFG       4  1754           NaN   
...            ...                ...     ...   ...           ...   
61420       Global                WLD     756  2018  36766.912297   
61421       Global                WLD     756  2019  37039.881946   
61422       Global                WLD     756  2020  35007.822458   
61423       Global                WLD     756  2021  36815.680623   
61424       Global                WLD     756  2022  37150.949699   

               Coal           Oil          Gas       Cement     Flaring  \
0               NaN           NaN          NaN     0.000000         NaN   
1               NaN  

In [36]:
# Rename coulmn names to be more readable
dataCo2=dataCo2.rename({"ISO 3166-1 alpha-3": "ISO Code", "Total":"Total Co2 Emission (Mt)", "Per Capita":"Per Capita (t)"},axis=1)
dataCo2 = dataCo2.drop(["UN M49"], axis=1) # This is recommended by the author as the column is not yet reliable.
print(dataCo2.columns,"\n")

Index(['Country', 'ISO Code', 'Year', 'Total Co2 Emission (Mt)', 'Coal', 'Oil',
       'Gas', 'Cement', 'Flaring', 'Other', 'Per Capita (t)'],
      dtype='object') 



In [37]:
print("number of nan values per column\n",dataCo2.isna().sum()) # check for nan values.

number of nan values per column
 Country                        0
ISO Code                    1092
Year                           0
Total Co2 Emission (Mt)    38253
Coal                       38470
Oil                        39655
Gas                        39683
Cement                     37884
Flaring                    39774
Other                      59612
Per Capita (t)             43882
dtype: int64


### **Removed the "Other" column since it´s very incomplete overall and would affect the result to much**
- 15181 rows x 10 columns with removing it
- 1813 rows x 11 columns by not removing it
- This leads to a lesser degree of dataloss

In [38]:
dataCo2= dataCo2.drop(["Other"], axis=1) 
dataCo2=dataCo2.dropna()
print("number of nan values per column\n",dataCo2.isna().sum()) # Double-check for nan values.

number of nan values per column
 Country                    0
ISO Code                   0
Year                       0
Total Co2 Emission (Mt)    0
Coal                       0
Oil                        0
Gas                        0
Cement                     0
Flaring                    0
Per Capita (t)             0
dtype: int64


In [39]:
print(dataCo2)

           Country ISO Code  Year  Total Co2 Emission (Mt)          Coal  \
200    Afghanistan      AFG  1950                 0.084272      0.021068   
201    Afghanistan      AFG  1951                 0.091600      0.025648   
202    Afghanistan      AFG  1952                 0.091600      0.031708   
203    Afghanistan      AFG  1953                 0.106256      0.037949   
204    Afghanistan      AFG  1954                 0.106256      0.042502   
...            ...      ...   ...                      ...           ...   
61420       Global      WLD  2018             36766.912297  14725.249126   
61421       Global      WLD  2019             37039.881946  14684.277641   
61422       Global      WLD  2020             35007.822458  14225.918061   
61423       Global      WLD  2021             36815.680623  14982.537322   
61424       Global      WLD  2022             37150.949699  15222.908123   

                Oil          Gas       Cement     Flaring  Per Capita (t)  
200        

In [40]:
print(dataCo2.drop(columns=['Year']).mean())

Total Co2 Emission (Mt)    212.120822
Coal                        89.200518
Oil                         78.811780
Gas                         34.166055
Cement                       6.083632
Flaring                      2.550702
Per Capita (t)               5.177676
dtype: float64


In [41]:
print(dataCo2.drop(columns=['Year']).median()) 

Total Co2 Emission (Mt)    6.192160
Coal                       0.109982
Oil                        2.783645
Gas                        0.000000
Cement                     0.161670
Flaring                    0.000000
Per Capita (t)             1.826953
dtype: float64


In [42]:
print(dataCo2.drop(columns=['Year']).std()) #Standard deviation

Total Co2 Emission (Mt)    1682.809028
Coal                        706.212991
Oil                         623.692216
Gas                         301.935756
Cement                       60.544121
Flaring                      20.613801
Per Capita (t)               18.347518
dtype: float64
