In [14]:
import pandas as pd
import numpy as np
import warnings
from datetime import datetime
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

In [15]:
df = pd.read_csv("CountryTemperatures_byYear.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,year,Country,continent,country_code,AverageTemperature,AverageTemperatureUncertainty,Latitude,Longitude,GHG,temp_diff,Code,gdp
0,0,1900,AFGHANISTAN,Asia,AF,13.749333,0.748833,33.0,65.0,0.0,0.0,,
1,1,1900,ALBANIA,Europe,AL,13.068583,0.723417,41.0,20.0,0.0,0.0,,
2,2,1900,ALGERIA,Africa,DZ,22.864167,0.826667,28.0,3.0,0.0,0.0,,
3,3,1900,AMERICAN SAMOA,Oceania,AS,26.2735,0.891,-14.33,-170.0,,0.0,,
4,4,1900,ANDORRA,Europe,AD,11.348333,0.542167,42.5,1.5,0.0,0.0,,


In [16]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24342 entries, 0 to 24341
Data columns (total 13 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   Unnamed: 0                     24342 non-null  int64  
 1   year                           24342 non-null  int64  
 2   Country                        24342 non-null  object 
 3   continent                      24342 non-null  object 
 4   country_code                   24342 non-null  object 
 5   AverageTemperature             24341 non-null  float64
 6   AverageTemperatureUncertainty  24341 non-null  float64
 7   Latitude                       24342 non-null  float64
 8   Longitude                      24342 non-null  float64
 9   GHG                            22458 non-null  float64
 10  temp_diff                      24341 non-null  float64
 11  Code                           9396 non-null   object 
 12  gdp                            7323 non-null  

In [17]:
df = df.dropna()
df= df[df['GHG'] != 0]
df

Unnamed: 0.1,Unnamed: 0,year,Country,continent,country_code,AverageTemperature,AverageTemperatureUncertainty,Latitude,Longitude,GHG,temp_diff,Code,gdp
12786,12786,1960,AFGHANISTAN,Asia,AF,13.985417,0.441833,33.00,65.00,2.253063e+06,-0.167167,AFG,5.377778e+08
12788,12788,1960,ALGERIA,Africa,DZ,23.504083,0.452083,28.00,3.00,5.944026e+07,0.170417,DZA,2.723593e+09
12797,12797,1960,AUSTRALIA,Oceania,AU,21.210500,0.222000,-27.00,133.00,2.054328e+09,-0.887667,AUS,1.860679e+10
12798,12798,1960,AUSTRIA,Europe,AT,6.527083,0.308917,47.33,13.33,1.930452e+09,-0.321667,AUT,6.592694e+09
12802,12802,1960,BANGLADESH,Asia,BD,25.484250,0.272333,24.00,90.00,4.051911e+06,0.495333,BGD,4.274894e+09
...,...,...,...,...,...,...,...,...,...,...,...,...,...
24334,24334,2013,URUGUAY,South America,UY,16.754375,0.348250,-33.00,-56.00,3.261600e+08,-1.754625,URY,5.753123e+10
24335,24335,2013,UZBEKISTAN,Asia,UZ,16.188250,0.350500,41.00,64.00,5.622211e+09,3.044083,UZB,7.318004e+10
24337,24337,2013,VIETNAM,Asia,VN,25.232125,0.383125,16.00,106.00,2.275257e+09,0.527792,VNM,1.712220e+11
24340,24340,2013,ZAMBIA,Africa,ZM,21.196000,0.825125,-15.00,30.00,1.693902e+08,-0.501750,ZMB,2.803724e+10


In [18]:
df["gdp"] = df["gdp"] / 1000000000  # divided by billion

In [19]:
def get_temperature_diff_between(df_by_year,start_date,end_date):
    df = df_by_year.loc[(df_by_year["year"] >= start_date) & 
                        (df_by_year["year"] <= end_date)].groupby(["Country","continent"]).agg({"gdp":np.mean, "AverageTemperature": np.mean, "GHG": np.sum, "temp_diff": np.sum}).reset_index()
        
    return df

In [20]:
df = get_temperature_diff_between(df,1960,2001)
df.head()

Unnamed: 0,Country,continent,gdp,AverageTemperature,GHG,temp_diff
0,AFGHANISTAN,Asia,1.868414,14.078186,331116300.0,0.7075
1,ALBANIA,Europe,2.227492,13.134764,3119427000.0,1.1795
2,ALGERIA,Africa,31.236299,23.382044,32370420000.0,1.39075
3,ANDORRA,Europe,1.198469,12.118604,33470640.0,-0.369
4,ANGOLA,Africa,7.198019,22.355035,2340391000.0,0.533583


In [21]:
df["gdp"] = round(df["gdp"], 2)
df["gdp_test"] = df["gdp"]*2
df["temp_diff"] = round(df["temp_diff"], 2)

In [22]:
df

Unnamed: 0,Country,continent,gdp,AverageTemperature,GHG,temp_diff,gdp_test
0,AFGHANISTAN,Asia,1.87,14.078186,3.311163e+08,0.71,3.74
1,ALBANIA,Europe,2.23,13.134764,3.119427e+09,1.18,4.46
2,ALGERIA,Africa,31.24,23.382044,3.237042e+10,1.39,62.48
3,ANDORRA,Europe,1.20,12.118604,3.347064e+07,-0.37,2.40
4,ANGOLA,Africa,7.20,22.355035,2.340391e+09,0.53,14.40
...,...,...,...,...,...,...,...
159,URUGUAY,South America,8.41,17.614522,6.101954e+09,1.44,16.82
160,UZBEKISTAN,Asia,13.77,13.276153,4.301910e+10,1.03,27.54
161,VIETNAM,Asia,20.95,24.114868,1.104759e+10,0.43,41.90
162,ZAMBIA,Africa,2.63,21.563063,3.465993e+09,0.10,5.26


In [23]:
fig_1 = px.scatter(df, x="temp_diff", y="GHG", size="gdp_test", color="continent",
           hover_name="Country", log_x=False, size_max=100,
                  labels={
                     "temp_diff": "Increase in Average Temperature between 1960 and 2001 years",
                     "GHG": "Greenhouse Gases Emissions"
                      }, custom_data=['Country', 'gdp'])


fig_1.update_traces(
    hovertemplate="<br>".join([
        "Country: %{customdata[0]}",
        "GDP: %{customdata[1]} billion $",
        "Increase in Average Temperature: %{x} °C",
        "Greenhouse Gases Emissions: %{y} t",
    ])
)


fig_1.show(renderer = "browser")

In [34]:
df.to_csv("Correlation.csv")