In [12]:
import pandas as pd
import numpy as np
import warnings
from datetime import datetime
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

In [13]:
df = pd.read_csv("Correlation.csv")

In [14]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7158 entries, 0 to 7157
Data columns (total 13 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   Unnamed: 0                     7158 non-null   int64  
 1   year                           7158 non-null   int64  
 2   Country                        7158 non-null   object 
 3   continent                      7158 non-null   object 
 4   AverageTemperature             7158 non-null   float64
 5   AverageTemperatureUncertainty  7158 non-null   float64
 6   Latitude                       7158 non-null   float64
 7   Longitude                      7158 non-null   float64
 8   GHG                            7158 non-null   float64
 9   temp_diff                      7158 non-null   float64
 10  Code                           7158 non-null   object 
 11  gdp                            7158 non-null   float64
 12  gdp_test                       7158 non-null   f

In [15]:
def get_temperature_diff_between(df_by_year,start_date,end_date):
    df = df_by_year.loc[(df_by_year["year"] >= start_date) & 
                        (df_by_year["year"] <= end_date)].groupby(["Country","continent"]).agg({"gdp":np.mean, "AverageTemperature": np.mean, "GHG": np.sum, "temp_diff": np.sum}).reset_index()
        
    return df

In [16]:
df = get_temperature_diff_between(df,1960,2013)
df.head()

Unnamed: 0,Country,continent,gdp,AverageTemperature,GHG,temp_diff
0,AFGHANISTAN,Asia,5119762000.0,14.543016,1438586000.0,1.463042
1,ALBANIA,Europe,5323462000.0,13.342171,5849004000.0,2.482958
2,ALGERIA,Africa,54913520000.0,23.586074,66745350000.0,1.787833
3,ANDORRA,Europe,1364792000.0,11.847656,140661000.0,1.423042
4,ANGOLA,Africa,31200930000.0,22.406356,6293289000.0,0.538542


In [17]:
df["gdp"] = round(df["gdp"] / 1000000000, 2)
df["gdp_test"] = df["gdp"]*2
df["temp_diff"] = round(df["temp_diff"], 2)

In [18]:
# This plot is for all years

fig = px.scatter(df, x="temp_diff", y="GHG", color="continent", template="simple_white", hover_name="Country",
                 size = "gdp_test",size_max=80,
                labels={
                     "temp_diff": "Increase in average temperature",
                     "GHG": "Greenhouse Gases Emissions"

                 },)
fig.show(renderer = "browser")

In [19]:
fig_1 = px.scatter(df, x="temp_diff", y="GHG", size="gdp_test", color="continent",
           hover_name="Country", log_x=False, size_max=100,
                  labels={
                     "temp_diff": "Increase in average temperature between 1960 and 2001 years",
                     "GHG": "Greenhouse Gases Emissions"
                      }, custom_data=['Country', 'gdp'])


fig_1.update_traces(
    hovertemplate="<br>".join([
        "Country: %{customdata[0]}",
        "GDP: %{customdata[1]} billion $",
        "Increase in average temperature: %{x} °C",
        "Greenhouse Gases Emissions: %{y}",
    ])
)


fig_1.show(renderer = "browser")