In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import scipy.stats as st
import pandas as pd
import numpy as np
import requests
import gmaps
import os
import datetime

In [2]:
# Read Global Temperatures by Country file and store into Pandas data frame
land_temp_by_country_csv = "../../ProjectOne_datasets/GlobalLandTemperaturesByCountry.csv"
land_temp_by_country_df = pd.read_csv(land_temp_by_country_csv)
land_temp_by_country_df.tail()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,Country
577457,2013-05-01,19.059,1.022,Zimbabwe
577458,2013-06-01,17.613,0.473,Zimbabwe
577459,2013-07-01,17.0,0.453,Zimbabwe
577460,2013-08-01,19.759,0.717,Zimbabwe
577461,2013-09-01,,,Zimbabwe


In [3]:
# Read Population Growth by Country file and store into Pandas data frame
population_growth_country_csv = "../../ProjectOne_datasets/population-by-country.csv"
population_growth_country_df = pd.read_csv(population_growth_country_csv)
population_growth_country_df.tail()

#Rename Column for World Population data
population_growth_country_df = population_growth_country_df.rename(columns={"Population by Country (Clio Infra (2016))":"Population",
                                                                           "Entity":"Country"})
population_growth_country_df.head()

Unnamed: 0,Country,Code,Year,Population
0,Afghanistan,AFG,1500,2000000.0
1,Afghanistan,AFG,1600,2500000.0
2,Afghanistan,AFG,1700,2500000.0
3,Afghanistan,AFG,1800,3280000.0
4,Afghanistan,AFG,1820,3280000.0


In [4]:
#Load CO2 Emissions Growth by Country file and store into Pandas data frame
co2_country_csv = "../../ProjectOne_datasets/annual-co-emissions-by-region.csv"
co2_country_df = pd.read_csv(co2_country_csv)
co2_country_df.tail()

#Rename Column 
co2_country_df = co2_country_df.rename(columns={"Entity":"Country"})
co2_country_df.head()

Unnamed: 0,Country,Code,Year,Annual CO2 emissions
0,Afghanistan,AFG,1750,0.0
1,Afghanistan,AFG,1751,0.0
2,Afghanistan,AFG,1752,0.0
3,Afghanistan,AFG,1753,0.0
4,Afghanistan,AFG,1754,0.0


In [5]:
# Cleanup Land Temperature Datasets

#Drop AverageTemperatureUncertainty
land_temp_by_country_df = land_temp_by_country_df.drop(["AverageTemperatureUncertainty"], axis=1)

#Remove rows with null temperatures
land_temp_by_country_df = land_temp_by_country_df[land_temp_by_country_df["AverageTemperature"].notna()]

print(f"Country Data Size: {len(land_temp_by_country_df)}")

Country Data Size: 544811


In [21]:
# Function to get Surface Temperatures for given country
global countries_temperatures_df 
columns = ["Decade", "Land Temperature", "Country"]
countries_temperatures_df = pd.DataFrame(columns = columns)

def get_surface_temps_by_country(country):
    global countries_temperatures_df 
    
    temp_df = land_temp_by_country_df.loc[land_temp_by_country_df["Country"]==country] #cent_land_temp_by_country_df
            
    #Since the data is in dates, and we want to analyze decades, change date to decade and calculate mean for given decade
    temp_df = temp_df.groupby((pd.DatetimeIndex(temp_df["dt"]).year//10)*10).mean()
    
    temp_df["Country"] = country

    #Reset index, and rename columns appropriately
    temp_df.reset_index(inplace=True)
    temp_df = temp_df.rename(columns = {'dt':'Decade'})
    temp_df = temp_df.rename(columns = {'AverageTemperature':'Land Temperature'})
    
     #Exclude 2010-2019(and onwards) as data present is incomplete
    temp_df = temp_df.loc[temp_df["Decade"] != 2010]
    countries_temperatures_df = pd.concat([countries_temperatures_df, temp_df], axis=0)

In [7]:
# Function to get Population for given country
global countries_population_df 
columns = ["Decade", "Population", "Country"]
countries_population_df = pd.DataFrame(columns = columns)

def get_population_by_country(country):
    global countries_population_df 
    
    temp_df = population_growth_country_df.loc[population_growth_country_df["Country"] == country]
    temp_df = temp_df.rename(columns = {'Year':'Decade'})
    temp_df = temp_df.drop(["Code"], axis=1)
    countries_population_df = pd.concat([countries_population_df, temp_df], axis=0)

In [8]:
# Function to get CO2 Emissions for given country
global countries_CO2_df 
columns = ["Decade", "Annual CO2 emissions", "Country"]
countries_CO2_df = pd.DataFrame(columns = columns)

def get_CO2_by_country(country):
    global countries_CO2_df 
    
    temp_df = co2_country_df.loc[co2_country_df["Country"] == country]

    #Since the data is in years, and we want to analyze decades, change year to decade and calculate mean for given decade
    temp_df = temp_df.groupby((temp_df["Year"]//10)*10).mean()
    temp_df["Country"] = country
    
    temp_df = temp_df.rename(columns = {'Year':'Decade'})
    
    #Exclude 2010-2019(and onwards) as data present is incomplete
    temp_df = temp_df.loc[temp_df["Decade"] != 2010]
    countries_CO2_df = pd.concat([countries_CO2_df, temp_df], axis=0)

In [9]:
#list of countries to operate on
list_of_countries = ["United States", "China", "Brazil", "Denmark", "India","United Kingdom"]

for country in list_of_countries:
    get_surface_temps_by_country(country)
    get_population_by_country(country)
    get_CO2_by_country(country)

#clean up CO2 dataframe
countries_CO2_df.reset_index(inplace=True)
countries_CO2_df = countries_CO2_df.drop(["Decade"], axis=1)
countries_CO2_df = countries_CO2_df.rename(columns = {'index':'Decade'})
print(f"Temp DF Size: {len(countries_temperatures_df)}")
print(f"Pop DF Size: {len(countries_population_df)}")
print(f"CO2 DF Size: {len(countries_CO2_df)}")

Temp DF Size: 129
Pop DF Size: 143
CO2 DF Size: 162


In [10]:
#Final cleansed and merged datasets
final_population_land_temps_df = pd.merge(countries_temperatures_df, countries_population_df, on=["Decade", "Country"], how="outer")
final_df = pd.merge(final_population_land_temps_df, countries_CO2_df, on=["Decade", "Country"], how="outer")
final_df = final_df.rename(columns = {"Annual CO2 emissions":"Annual CO2 Emissions"})
print(f"Merged Data Set final_population_land_temps_df: {len(final_df)}") 
print(f"Merged Data Set final_df: {len(final_df)}") 

Merged Data Set final_population_land_temps_df: 183
Merged Data Set final_df: 183


In [11]:
#Reorganize the column names for ease of redability
column_names = ["Decade", "Country", "Land Temperature", "Population", "Annual CO2 Emissions"]
final_df = final_df.reindex(columns=column_names)
final_df

Unnamed: 0,Decade,Country,Land Temperature,Population,Annual CO2 Emissions
0,1760.0,United States,8.821917,,0.000000e+00
1,1770.0,United States,7.185043,,0.000000e+00
2,1780.0,United States,7.114364,,0.000000e+00
3,1790.0,United States,9.519846,,0.000000e+00
4,1810.0,United States,6.640041,7240000.0,5.954000e+05
...,...,...,...,...,...
178,1760.0,India,,,0.000000e+00
179,1770.0,India,,,0.000000e+00
180,1780.0,India,,,0.000000e+00
181,2010.0,India,,,2.191411e+09


In [12]:
# Read Lat Long by Country file and store into Pandas data frame
country_geocode_path = "../../ProjectOne_datasets/countries_geocode.csv"
country_geocode_df = pd.read_csv(country_geocode_path)
country_geocode_df = country_geocode_df.rename(columns = {"name":"Country"})
country_geocode_df = country_geocode_df.drop(["country"], axis=1)
country_geocode_df.tail()

exportable_df = pd.merge(final_df, country_geocode_df, on=["Country"], how="inner")
exportable_df.tail()

Unnamed: 0,Decade,Country,Land Temperature,Population,Annual CO2 Emissions,latitude,longitude
178,1500.0,United Kingdom,,3942000.0,,55.378051,-3.435973
179,1600.0,United Kingdom,,6170000.0,,55.378051,-3.435973
180,1650.0,United Kingdom,,5800000.0,,55.378051,-3.435973
181,1700.0,United Kingdom,,8565000.0,,55.378051,-3.435973
182,2010.0,United Kingdom,,,434628736.2,55.378051,-3.435973


In [16]:
# Push the remade DataFrame to a new CSV file by century

cent_1800_exportable_df = exportable_df.loc[(exportable_df["Decade"] >= 1850) &
                                             (exportable_df["Decade"] < 1900)]

cent_1900_exportable_df = exportable_df.loc[(exportable_df["Decade"] >= 1900) &
                                            (exportable_df["Decade"] < 2010)]

exportable_df.to_csv("../Output/Population_SurfaceTemps_CO2_Worldwide.csv",
                  encoding="utf-8", index=False, header=True)
cent_1800_exportable_df.to_csv("../Output/1800_Population_SurfaceTemps_CO2_By_Country.csv",
                  encoding="utf-8", index=False, header=True)
cent_1900_exportable_df.to_csv("../Output/1900_Population_SurfaceTemps_CO2_By_Country.csv",
                  encoding="utf-8", index=False, header=True)