In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress

#CSV Output File
data_co2="annual-co-emissions-by-region.csv"
data_country = "GlobalLandTemperaturesByCountry.csv"

In [2]:
co2_df = pd.read_csv(data_co2) 
co2_df

Unnamed: 0,Entity,Code,Year,Annual CO2 emissions
0,Afghanistan,AFG,1750,0.0
1,Afghanistan,AFG,1751,0.0
2,Afghanistan,AFG,1752,0.0
3,Afghanistan,AFG,1753,0.0
4,Afghanistan,AFG,1754,0.0
...,...,...,...,...
63175,Zimbabwe,ZWE,2015,12170460.0
63176,Zimbabwe,ZWE,2016,10814761.0
63177,Zimbabwe,ZWE,2017,10246841.0
63178,Zimbabwe,ZWE,2018,11340575.0


In [3]:
clean_df=co2_df.loc[co2_df['Year'] > 1899]

In [4]:
clean_df

Unnamed: 0,Entity,Code,Year,Annual CO2 emissions
150,Afghanistan,AFG,1900,0.0
151,Afghanistan,AFG,1901,0.0
152,Afghanistan,AFG,1902,0.0
153,Afghanistan,AFG,1903,0.0
154,Afghanistan,AFG,1904,0.0
...,...,...,...,...
63175,Zimbabwe,ZWE,2015,12170460.0
63176,Zimbabwe,ZWE,2016,10814761.0
63177,Zimbabwe,ZWE,2017,10246841.0
63178,Zimbabwe,ZWE,2018,11340575.0


In [5]:
clean_df = clean_df.reset_index()
del clean_df['index']
clean_df.head

<bound method NDFrame.head of             Entity Code  Year  Annual CO2 emissions
0      Afghanistan  AFG  1900                   0.0
1      Afghanistan  AFG  1901                   0.0
2      Afghanistan  AFG  1902                   0.0
3      Afghanistan  AFG  1903                   0.0
4      Afghanistan  AFG  1904                   0.0
...            ...  ...   ...                   ...
28075     Zimbabwe  ZWE  2015            12170460.0
28076     Zimbabwe  ZWE  2016            10814761.0
28077     Zimbabwe  ZWE  2017            10246841.0
28078     Zimbabwe  ZWE  2018            11340575.0
28079     Zimbabwe  ZWE  2019            10374287.0

[28080 rows x 4 columns]>

In [6]:
bins = [1900, 1910, 1920, 1930, 1940, 1950, 1960, 1970, 1980, 1990, 2000, 2010, 2020]
group_names = ["1900-1909", "1910-1919", "1920-1929", "1930-1939", "1940-1949", "1950-1959", "1960-1969", "1970-1979", "1980-1989", "1990-1999", "2000-2009", "2010-2019"]

clean_df["Decade"] = pd.cut(clean_df["Year"], bins, labels=group_names, include_lowest=True)
clean_df

Unnamed: 0,Entity,Code,Year,Annual CO2 emissions,Decade
0,Afghanistan,AFG,1900,0.0,1900-1909
1,Afghanistan,AFG,1901,0.0,1900-1909
2,Afghanistan,AFG,1902,0.0,1900-1909
3,Afghanistan,AFG,1903,0.0,1900-1909
4,Afghanistan,AFG,1904,0.0,1900-1909
...,...,...,...,...,...
28075,Zimbabwe,ZWE,2015,12170460.0,2010-2019
28076,Zimbabwe,ZWE,2016,10814761.0,2010-2019
28077,Zimbabwe,ZWE,2017,10246841.0,2010-2019
28078,Zimbabwe,ZWE,2018,11340575.0,2010-2019


In [7]:
decade_co2 = clean_df.groupby(['Decade', 'Entity']).agg({'Annual CO2 emissions': ['mean', 'min', 'max']})
decade_co2

Unnamed: 0_level_0,Unnamed: 1_level_0,Annual CO2 emissions,Annual CO2 emissions,Annual CO2 emissions
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,min,max
Decade,Entity,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1900-1909,Afghanistan,0.000000e+00,0.000000e+00,0.000000e+00
1900-1909,Africa,9.998390e+06,2.330304e+06,1.741133e+07
1900-1909,Albania,0.000000e+00,0.000000e+00,0.000000e+00
1900-1909,Algeria,0.000000e+00,0.000000e+00,0.000000e+00
1900-1909,Andorra,0.000000e+00,0.000000e+00,0.000000e+00
...,...,...,...,...
2010-2019,Wallis and Futuna Islands,2.487422e+04,2.198400e+04,2.853800e+04
2010-2019,World,3.535434e+10,3.420958e+10,3.644139e+10
2010-2019,Yemen,1.570664e+07,9.945288e+06,2.497675e+07
2010-2019,Zambia,4.842892e+06,2.781413e+06,6.930094e+06


In [8]:
idx = pd.IndexSlice

In [9]:
main_country=decade_co2.loc[idx[:,['Brazil', 'India', 'Russia', 'United States', 'China']], :]
main_country

Unnamed: 0_level_0,Unnamed: 1_level_0,Annual CO2 emissions,Annual CO2 emissions,Annual CO2 emissions
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,min,max
Decade,Entity,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1900-1909,Brazil,2775313.0,0.0,4198944.0
1910-1919,Brazil,3790042.0,2322976.0,6034608.0
1920-1929,Brazil,5110804.0,2890896.0,6345662.0
1930-1939,Brazil,5295289.0,4168949.0,6485943.0
1940-1949,Brazil,8262202.0,5866699.0,19659070.0
1950-1959,Brazil,33301680.0,21659350.0,46851820.0
1960-1969,Brazil,65654880.0,49144500.0,93530490.0
1970-1979,Brazil,150715400.0,102357900.0,187155100.0
1980-1989,Brazil,188383900.0,165426900.0,212113600.0
1990-1999,Brazil,268559300.0,217227700.0,324226000.0


In [10]:
main_country.to_csv('co2_data2.csv')

In [22]:
main_country_co2_pie = main_country.groupby(['Decade', 'Entity'])
main_country_co2_pie.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Annual CO2 emissions,Annual CO2 emissions,Annual CO2 emissions
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,min,max
Decade,Entity,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1900-1909,Brazil,2775313.0,0.0,4198944.0
1910-1919,Brazil,3790042.0,2322976.0,6034608.0
1920-1929,Brazil,5110804.0,2890896.0,6345662.0
1930-1939,Brazil,5295289.0,4168949.0,6485943.0
1940-1949,Brazil,8262202.0,5866699.0,19659070.0
1950-1959,Brazil,33301680.0,21659350.0,46851820.0
1960-1969,Brazil,65654880.0,49144500.0,93530490.0
1970-1979,Brazil,150715400.0,102357900.0,187155100.0
1980-1989,Brazil,188383900.0,165426900.0,212113600.0
1990-1999,Brazil,268559300.0,217227700.0,324226000.0


In [26]:
main_country_co2_pie.set_index('Entity')

AttributeError: 'DataFrameGroupBy' object has no attribute 'set_index'