In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from first_glance import report

In [2]:
report.stats_report('global_co2.csv')

Unnamed: 0,entry_count,data_type,null_count,mean,std,min,25%,50%,75%,max
country,48058,object,0,,,,,,,
year,48058,int64,0,1926.843,59.41485,1750.0,1883.0,1930.0,1976.0,2022.0
population,48058,float64,8563,60832230.0,328586700.0,222.0,346437.5,2456362.0,10080380.0,7975105000.0
gdp,48058,float64,33494,267758600000.0,2103151000000.0,49980000.0,7516679000.0,25979990000.0,113294200000.0,113630200000000.0
cement_co2,48058,float64,24294,9.1094,66.46355,0.0,0.0,0.029,0.834,1692.404
co2,48058,float64,17750,391.2722,1855.825,0.0,0.183,3.856,47.27725,37149.79
coal_co2,48058,float64,22983,156.9639,765.7201,0.0,0.0,0.703,16.988,15219.3
gas_co2,48058,float64,22975,51.2781,316.0338,0.0,0.0,0.0,1.3445,7922.112
oil_co2,48058,float64,22947,112.9207,629.4511,0.0,0.084,1.323,13.916,12377.95
share_global_co2,48058,float64,19563,5.163432,18.34465,0.0,0.002,0.037,0.518,100.0


In [3]:
df = pd.read_csv('global_co2.csv')

In [4]:
#Creating a regex pattern to filter out of the dataframe
exclude_list = ['World', 'Europe', 'countries', 'GCP', 'Asia', 'Africa', 'America', 'International']
pattern = '|'.join(exclude_list)
#Creation of a dataframe of current countries using the created regex pattern
co2_df = df.copy()
co2_df = co2_df[~co2_df['country'].str.contains(pattern, na=False, case=False, regex=True)]
co2_df['co2'].dropna(inplace=True)

In [45]:
def cumulative_co2(direction, entry_count):
    if direction not in {'top', 'bottom'}:
        raise ValueError('Invalid value for direction. Please select "top" or "bottom".')
    df = co2_df[co2_df['co2'].notnull()].groupby('country').sum()[['co2']].sort_values(by='co2', ascending=False)
    

In [47]:
cumulative_co2('top', 10)

Unnamed: 0_level_0,co2
country,Unnamed: 1_level_1
United States,426914.556
China,260619.243
Russia,119290.814
Germany,93985.871
United Kingdom,78834.706
Japan,67734.911
India,59740.694
France,39397.693
Canada,34613.228
Ukraine,30961.508


In [48]:
cumulitive_co2('bottom', 10)

Unnamed: 0_level_0,co2
country,Unnamed: 1_level_1
Anguilla,3.332
Cook Islands,2.56
Kiribati,2.149
Montserrat,1.634
Christmas Island,1.333
Wallis and Futuna,0.798
Saint Helena,0.429
Tuvalu,0.303
Niue,0.3
Antarctica,0.157


In [6]:
#Creating a co2 per capita column
co2_df.loc[:,'co2_per_capita'] = round((co2_df['co2']*1e6) / co2_df['population'],2)

In [36]:
def per_cap_by_year(direction, entries, year):
    if direction == 'top':
        return co2_df[(co2_df['year'] == year) & (co2_df['co2'].notnull())].sort_values(by='co2_per_capita', ascending=False).head(entries)
    elif direction == 'bottom':
        return co2_df[(co2_df['year'] == year) & (co2_df['co2'].notnull())].sort_values(by='co2_per_capita', ascending=False).tail(entries)

In [39]:
per_cap_by_year('top', 15, 2020)

Unnamed: 0,country,year,population,gdp,cement_co2,co2,coal_co2,gas_co2,oil_co2,share_global_co2,co2_per_capita
36073,Qatar,2020,2760390.0,,1.362,102.501,0.0,86.818,12.524,0.293,37.13
4162,Bahrain,2020,1477478.0,,0.213,37.396,0.0,33.13,3.609,0.107,25.31
6979,Brunei,2020,441736.0,,0.0,11.102,2.132,5.331,3.353,0.032,25.13
45280,United Arab Emirates,2020,9287286.0,,5.204,216.768,8.086,132.351,69.331,0.619,23.34
43789,Trinidad and Tobago,2020,1518142.0,,0.245,35.03,0.0,30.964,3.444,0.1,23.07
22878,Kuwait,2020,4360451.0,,1.187,97.712,0.0,51.688,43.448,0.279,22.41
30391,New Caledonia,2020,286412.0,,0.028,5.109,2.714,0.0,2.367,0.015,17.84
38343,Saudi Arabia,2020,35997108.0,,25.593,610.773,0.432,261.062,319.43,1.745,16.97
3438,Australia,2020,25670052.0,,2.82,396.685,156.681,83.48,135.38,1.133,15.45
39580,Sint Maarten (Dutch part),2020,43638.0,,0.0,0.652,0.0,0.0,0.652,0.002,14.94


In [37]:
per_cap_by_year('bottom', 10, 2020)

Unnamed: 0,country,year,population,gdp,cement_co2,co2,coal_co2,gas_co2,oil_co2,share_global_co2,co2_per_capita
8774,Chad,2020,16644707.0,,0.151,2.279,0.0,0.0,1.847,0.007,0.14
26133,Madagascar,2020,28225182.0,,0.058,3.964,1.374,0.0,2.532,0.011,0.14
39135,Sierra Leone,2020,8233973.0,,0.0,1.026,0.0,0.0,1.026,0.003,0.12
44930,Uganda,2020,44404608.0,,0.559,5.538,0.0,0.0,4.979,0.016,0.12
30910,Niger,2020,24333644.0,,0.032,2.817,0.359,0.073,2.283,0.008,0.12
26306,Malawi,2020,19377058.0,,0.24,1.871,0.22,0.0,1.411,0.005,0.1
36596,Rwanda,2020,13146367.0,,0.163,1.372,0.011,0.088,1.11,0.004,0.1
7498,Burundi,2020,12220229.0,,0.003,0.736,0.026,0.0,0.707,0.002,0.06
11466,Democratic Republic of Congo,2020,92853168.0,,0.498,3.71,0.027,0.0,2.748,0.011,0.04
40272,Somalia,2020,16537018.0,,,0.597,0.0,0.0,0.597,0.002,0.04


In [13]:
def country_per_cap_co2(country):
    return co2_df[(co2_df['country'] == country) & (co2_df['co2_per_capita'].notnull())]

In [17]:
country_per_cap_co2('Canada')

Unnamed: 0,country,year,population,gdp,cement_co2,co2,coal_co2,gas_co2,oil_co2,share_global_co2,co2_per_capita
7852,Canada,1790,816556.0,,,0.004,0.004,0.000,0.000,0.018,0.00
7862,Canada,1800,500000.0,,,0.004,0.004,0.000,0.000,0.011,0.01
7863,Canada,1801,512396.0,,,0.004,0.004,0.000,0.000,0.011,0.01
7864,Canada,1802,525100.0,,,0.004,0.004,0.000,0.000,0.009,0.01
7865,Canada,1803,538119.0,,,0.004,0.004,0.000,0.000,0.012,0.01
...,...,...,...,...,...,...,...,...,...,...,...
8080,Canada,2018,37035260.0,1.669402e+12,6.990,577.066,60.974,230.017,261.441,1.570,15.58
8081,Canada,2019,37522584.0,,7.195,578.588,58.626,231.565,263.154,1.562,15.42
8082,Canada,2020,37888704.0,,6.713,522.845,48.517,221.474,227.670,1.494,13.80
8083,Canada,2021,38155012.0,,7.380,537.174,45.894,228.711,235.891,1.459,14.08


In [None]:
#Creating a new df for the purpose of displaying percentage of total co2 by category
co2_percent_share = new_df[new_df['co2'].notnull()].drop(columns = ['share_global_co2', 'co2_per_capita'])

In [None]:
#functionize this!!
#Calculating the percentage of the total Co2 by category
co2_percent_share['coal_prct'] = round(co2_percent_share['coal_co2']/co2_percent_share['co2']*100, 2)
co2_percent_share['gas_prct'] = round(co2_percent_share['gas_co2']/co2_percent_share['co2']*100, 2)
co2_percent_share['oil_prct'] = round(co2_percent_share['oil_co2']/co2_percent_share['co2']*100, 2)
co2_percent_share['cement_prct'] = round(co2_percent_share['cement_co2']/co2_percent_share['co2']*100, 2)

In [None]:
#Dropping unwanted columns
co2_prct_year = co2_percent_share.drop(columns=['cement_co2', 'coal_co2', 'gas_co2', 'oil_co2', 'population', 'gdp'])

In [None]:
#Cumulative co2 percent share by category grouped by year
co2_prct_year.groupby('year').sum().drop(columns='country').reset_index()

In [None]:
co2_prct_country = co2_percent_share[['country', 'co2', 'coal_co2', 'gas_co2', 'oil_co2', 'cement_co2']].groupby('country').sum()

In [None]:
co2_prct_country['coal_prct'] = round(co2_prct_country['coal_co2']/co2_prct_country['co2']*100, 2)
co2_prct_country['gas_prct'] = round(co2_prct_country['gas_co2']/co2_prct_country['co2']*100, 2)
co2_prct_country['oil_prct'] = round(co2_prct_country['oil_co2']/co2_prct_country['co2']*100, 2)
co2_prct_country['cement_prct'] = round(co2_prct_country['cement_co2']/co2_prct_country['co2']*100, 2)

In [None]:
co2_prct_country.drop(columns=['coal_co2', 'gas_co2', 'oil_co2', 'cement_co2'], inplace = True)

In [None]:
co2_prct_country.reset_index(inplace=True)

In [None]:
co2_prct_country[co2_prct_country['country'] == 'China']

In [None]:
countries_demo = new_df[new_df['co2'].notnull()][['country', 'year', 'population', 'gdp', 'co2']]

In [None]:
def country_demo(country):
    df = countries_demo[(countries_demo['country'] == 'Canada') & (countries_demo['population'].notnull()) & (countries_d]
    df = df.reset_index().drop(columns='index')

In [None]:
can_demo = can_demo[can_demo['year'] >= 1928].reset_index().drop(columns='index')

In [None]:
can_demo['last_population'] = can_demo['population'].shift(1)
can_demo['pop_prct_chng'] = round((can_demo['population'] - can_demo['last_population'])/can_demo['population']*100, 2)

In [None]:
can_demo