# Import Packages:

In [25]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# import dash
# from dash import dcc
# from dash import html
# from dash.dependencies import Input, Output, State
import plotly.express as px

In [71]:
# Co2 dataset
df_co2 = pd.read_csv('Data/gapminder/consumption_co2_emissions_1000_tonnes.csv')
# Continent dataset
df_cont = pd.read_csv('Data/Countries-Continents.csv')

In [72]:
df_co2.head()

Unnamed: 0,country,1990,1991,1992,1993,1994,1995,1996,1997,1998,...,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017
0,Albania,5620,4570,2840,2630,2290,2460,2870,2190,2510,...,6130,6370,6140,6350,6060,5870,6300,5660,5500,5650
1,United Arab Emirates,55.2k,61k,63.5k,68.3k,75.2k,75.2k,78.1k,78.9k,92k,...,244k,200k,195k,217k,238k,245k,255k,230k,234k,236k
2,Argentina,105k,113k,118k,116k,120k,122k,130k,136k,138k,...,177k,165k,177k,185k,187k,193k,197k,203k,200k,202k
3,Armenia,13.7k,13.4k,4120,1990,1940,2840,3090,4120,4240,...,6500,5930,5540,5730,6330,6170,6120,5630,5700,5560
4,Australia,235k,233k,236k,237k,244k,250k,253k,262k,276k,...,367k,376k,384k,391k,401k,382k,375k,373k,373k,379k


# Reshaping co2 dataframe

In [3]:
df_co2 = df_co2.melt(id_vars='country')
df_co2.columns = ['Country', 'Year', 'Co2_Emission']
df_co2.head()

Unnamed: 0,Country,Year,Co2_Emission
0,Albania,1990,5620
1,United Arab Emirates,1990,55.2k
2,Argentina,1990,105k
3,Armenia,1990,13.7k
4,Australia,1990,235k


# Merging Continent dataset

In [4]:
df = df_co2.merge(df_cont, on='Country', how='left')
df.head()

Unnamed: 0,Country,Year,Co2_Emission,Continent
0,Albania,1990,5620,Europe
1,United Arab Emirates,1990,55.2k,Asia
2,Argentina,1990,105k,South America
3,Armenia,1990,13.7k,Europe
4,Australia,1990,235k,Oceania


# Fixing NaNs by hand:

In [5]:
df.loc[df.Country == 'Burkina Faso', 'Continent'] = 'Africa'

In [6]:
df.loc[df.Country == "Cote d'Ivoire", 'Continent'] = 'Africa'

In [7]:
df.loc[df.Country == "Czech Republic", 'Continent'] = 'Europe'

In [8]:
df.loc[df.Country == "Hong Kong, China", 'Continent'] = 'Asia'

In [9]:
df.loc[df.Country == "Kyrgyz Republic", 'Continent'] = 'Asia'

In [10]:
df.loc[df.Country == "South Korea", 'Continent'] = 'Asia'

In [11]:
df.loc[df.Country == "Lao", 'Continent'] = 'Asia'

In [12]:
df.loc[df.Country == "Russia", 'Continent'] = 'Asia'

In [13]:
df.loc[df.Country == "Slovak Republic", 'Continent'] = 'Europe'

In [14]:
df.loc[df.Country == "Taiwan", 'Continent'] = 'Asia'

In [15]:
df.loc[df.Country == "United States", 'Continent'] = 'North America'

In [16]:
df[df.Continent.isna()]

Unnamed: 0,Country,Year,Co2_Emission,Continent


# Fixing Columns data types:

In [17]:
df = df.astype({'Year':'int32'})

In [18]:
def fix_col(s):
    if 'k' in s:
        return float(s[:-1]) *1000
    elif 'M' in s:
        return float(s[:-1]) *1000000
    return float(s)

In [19]:
df['Co2_Emission'] = df['Co2_Emission'].apply(fix_col)

In [20]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3332 entries, 0 to 3331
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Country       3332 non-null   object 
 1   Year          3332 non-null   int32  
 2   Co2_Emission  3332 non-null   float64
 3   Continent     3332 non-null   object 
dtypes: float64(1), int32(1), object(2)
memory usage: 117.1+ KB


# Writing new cvs file:

In [21]:
df.to_csv('Data/co2.csv', index=False)

In [22]:
df_co = pd.read_csv('Data/co2.csv')
df_co.head()

Unnamed: 0,Country,Year,Co2_Emission,Continent
0,Albania,1990,5620.0,Europe
1,United Arab Emirates,1990,55200.0,Asia
2,Argentina,1990,105000.0,South America
3,Armenia,1990,13700.0,Europe
4,Australia,1990,235000.0,Oceania


In [23]:
df_co.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3332 entries, 0 to 3331
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Country       3332 non-null   object 
 1   Year          3332 non-null   int64  
 2   Co2_Emission  3332 non-null   float64
 3   Continent     3332 non-null   object 
dtypes: float64(1), int64(1), object(2)
memory usage: 104.2+ KB


# Visuals

In [31]:
px.bar(df_co, x='Country', 
          y='Co2_Emission', 
          title='Life Expectancy for different gdp per capita', 
          animation_frame='Year',
          template='seaborn',
          hover_name='Country')


In [34]:
px.sunburst(df_co, 
           color='Co2_Emission',
           values='Co2_Emission',
           path=['Continent', 'Country'],
           height=600,
           hover_name='Country')

In [45]:
df_co.groupby(['Year']).mean().reset_index()

Unnamed: 0,Year,Co2_Emission
0,1990,181215.638655
1,1991,181613.109244
2,1992,179476.042017
3,1993,181717.722689
4,1994,182697.386555
5,1995,186751.747899
6,1996,192161.722689
7,1997,193003.546218
8,1998,191493.042017
9,1999,194454.394958


In [62]:
px.line(df_co.groupby(['Year']).mean().reset_index(), x="Year", y="Co2_Emission", title='Co2_Emission mean')

In [None]:
px.line(df_co[df_co['Country']=='Egypt'], x="Year", y="Co2_Emission", title='Co2_Emission mean')

In [69]:
ddd = df_co.groupby(['Continent', 'Year']).mean().reset_index()
px.line(ddd[ddd['Continent']=='Africa'],
                        x="Year", 
                        y="Co2_Emission", 
                        title='Global Co2_Emission mean')