In [1]:
import pandas as pd

In [2]:
# Looking at Emissions Dataset

In [3]:
emissions = pd.read_csv('./Emissions Datasets/GCB2022v27_MtCO2_flat.csv')

In [4]:
emissions.columns

Index(['Country', 'ISO 3166-1 alpha-3', 'Year', 'Total', 'Coal', 'Oil', 'Gas',
       'Cement', 'Flaring', 'Other', 'Per Capita'],
      dtype='object')

In [5]:
print("DF shape:",emissions.shape)
print("DF number of countries:", emissions['Country'].nunique())
print("DF number of years:", emissions['Year'].nunique(), "from", emissions['Year'].min(), "to", emissions['Year'].max())

DF shape: (63104, 11)
DF number of countries: 232
DF number of years: 272 from 1750 to 2021


In [6]:
emissions = emissions.loc[emissions['Year'] >= 1950]

In [7]:
emissions.isnull().any()

Country               False
ISO 3166-1 alpha-3     True
Year                  False
Total                 False
Coal                   True
Oil                    True
Gas                    True
Cement                 True
Flaring                True
Other                  True
Per Capita             True
dtype: bool

In [8]:
print("DF shape:",emissions.shape)
print("DF number of countries:", emissions['Country'].nunique())
print("DF number of years:", emissions['Year'].nunique(), "from", emissions['Year'].min(), "to", emissions['Year'].max())

DF shape: (16704, 11)
DF number of countries: 232
DF number of years: 72 from 1950 to 2021


In [9]:
# Looking at Temperatures Dataset

In [10]:
temperatures = pd.read_csv('./Temperatures Datasets/GlobalLandTemperaturesByCountry.csv')

In [11]:
temperatures.columns

Index(['dt', 'AverageTemperature', 'AverageTemperatureUncertainty', 'Country'], dtype='object')

In [12]:
temperatures.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country
0,1743-11-01,4.384,2.294,Åland
1,1743-12-01,,,Åland
2,1744-01-01,,,Åland
3,1744-02-01,,,Åland
4,1744-03-01,,,Åland


### Splitting Date-Time

In [13]:
temperatures['dt']= pd.to_datetime(temperatures['dt'],format='%Y-%m-%d')

In [16]:
temperatures.dtypes

dt                               datetime64[ns]
AverageTemperature                      float64
AverageTemperatureUncertainty           float64
Country                                  object
dtype: object

In [17]:
temperatures['Year'] = temperatures['dt'].dt.year

In [18]:
print("DF shape:",temperatures.shape)
print("DF number of countries:", temperatures['Country'].nunique())
print("DF number of years:", temperatures['Year'].nunique(), "from", temperatures['Year'].min(), "to", temperatures['Year'].max())

DF shape: (577462, 5)
DF number of countries: 243
DF number of years: 271 from 1743 to 2013


In [19]:
temperatures.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country,Year
0,1743-11-01,4.384,2.294,Åland,1743
1,1743-12-01,,,Åland,1743
2,1744-01-01,,,Åland,1744
3,1744-02-01,,,Åland,1744
4,1744-03-01,,,Åland,1744


In [23]:
tempsbyYear = temperatures.groupby(['Year', 'Country']).mean().reset_index()

In [24]:
tempsbyYear = tempsbyYear.loc[tempsbyYear['Year']>= 1950]

In [27]:
tempsbyYear.head()
tempsbyYear.isnull().any()

Year                             False
Country                          False
AverageTemperature                True
AverageTemperatureUncertainty     True
dtype: bool

In [30]:
tempsbyYear.loc[tempsbyYear['AverageTemperature'].isnull()]

Unnamed: 0,Year,Country,AverageTemperature,AverageTemperatureUncertainty
32699,1950,Antarctica,,2.215200
32942,1951,Antarctica,,2.153333
33185,1952,Antarctica,,2.105000
33428,1953,Antarctica,,
33671,1954,Antarctica,,2.218167
...,...,...,...,...
47036,2009,Antarctica,,0.606500
47279,2010,Antarctica,,0.590083
47522,2011,Antarctica,,0.625833
47765,2012,Antarctica,,0.709417


In [34]:
tempsbyYear = tempsbyYear.loc[~(tempsbyYear['Country']=='Antarctica')]

In [35]:
tempsbyYear.isnull().any()

Year                             False
Country                          False
AverageTemperature               False
AverageTemperatureUncertainty    False
dtype: bool

In [44]:
missing_countries =  tempsbyYear.loc[~tempsbyYear['Country'].isin(emissions['Country']), 'Country']


In [48]:
#We need to standardize names
missing_countries.unique()
#missing_countries.nunique()

array(['Africa', 'American Samoa', 'Antigua And Barbuda', 'Asia',
       'Baker Island', 'Bonaire, Saint Eustatius And Saba',
       'Bosnia And Herzegovina', 'Burma', 'Cayman Islands',
       'Congo (Democratic Republic Of The)', "Côte D'Ivoire",
       'Denmark (Europe)', 'Europe', 'Falkland Islands (Islas Malvinas)',
       'Faroe Islands', 'Federated States Of Micronesia',
       'France (Europe)', 'French Southern And Antarctic Lands',
       'Gaza Strip', 'Guam', 'Guernsey', 'Guinea Bissau',
       'Heard Island And Mcdonald Islands', 'Isle Of Man', 'Jersey',
       'Kingman Reef', 'Macau', 'Macedonia', 'Monaco',
       'Netherlands (Europe)', 'North America',
       'Northern Mariana Islands', 'Oceania', 'Palestina',
       'Palmyra Atoll', 'Reunion', 'Saint Barthélemy',
       'Saint Kitts And Nevis', 'Saint Martin',
       'Saint Pierre And Miquelon', 'Saint Vincent And The Grenadines',
       'San Marino', 'Sao Tome And Principe', 'Sint Maarten',
       'South America', 'Sout

In [42]:
emissions['Country'].nunique()

232

In [43]:
tempsbyYear['Country'].nunique()

242