#### Author - Prateek Singh

## Importing Libraries

In [1]:
# Imports
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from IPython.display import display, HTML

# Plotly
import plotly.io as pio
import chart_studio.plotly as py
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

## Importing data

In [2]:
data = pd.read_csv("data/GlobalTemperatures.csv")
print("Size Before dropping NA values:", data.shape)
print(data.columns)

Size Before dropping NA values: (3192, 9)
Index(['dt', 'LandAverageTemperature', 'LandAverageTemperatureUncertainty',
       'LandMaxTemperature', 'LandMaxTemperatureUncertainty',
       'LandMinTemperature', 'LandMinTemperatureUncertainty',
       'LandAndOceanAverageTemperature',
       'LandAndOceanAverageTemperatureUncertainty'],
      dtype='object')


## Finding and Dropping NA Values

In [3]:
print(data.isna().sum())
data.dropna(axis = 0, inplace = True)
copy = data.copy()
print("Size after dropping NA values:", data.shape)
data

dt                                              0
LandAverageTemperature                         12
LandAverageTemperatureUncertainty              12
LandMaxTemperature                           1200
LandMaxTemperatureUncertainty                1200
LandMinTemperature                           1200
LandMinTemperatureUncertainty                1200
LandAndOceanAverageTemperature               1200
LandAndOceanAverageTemperatureUncertainty    1200
dtype: int64
Size after dropping NA values: (1992, 9)


Unnamed: 0,dt,LandAverageTemperature,LandAverageTemperatureUncertainty,LandMaxTemperature,LandMaxTemperatureUncertainty,LandMinTemperature,LandMinTemperatureUncertainty,LandAndOceanAverageTemperature,LandAndOceanAverageTemperatureUncertainty
1200,1850-01-01,0.749,1.105,8.242,1.738,-3.206,2.822,12.833,0.367
1201,1850-02-01,3.071,1.275,9.970,3.007,-2.291,1.623,13.588,0.414
1202,1850-03-01,4.954,0.955,10.347,2.401,-1.905,1.410,14.043,0.341
1203,1850-04-01,7.217,0.665,12.934,1.004,1.018,1.329,14.667,0.267
1204,1850-05-01,10.004,0.617,15.655,2.406,3.811,1.347,15.507,0.249
...,...,...,...,...,...,...,...,...,...
3187,2015-08-01,14.755,0.072,20.699,0.110,9.005,0.170,17.589,0.057
3188,2015-09-01,12.999,0.079,18.845,0.088,7.199,0.229,17.049,0.058
3189,2015-10-01,10.801,0.102,16.450,0.059,5.232,0.115,16.290,0.062
3190,2015-11-01,7.433,0.119,12.892,0.093,2.157,0.106,15.252,0.063


## Reformating date, Renaming columns, Grouping data by year

In [4]:
# Updating data
data['Date'] = pd.to_datetime(data.dt)
data['day'] = data['Date'].dt.day
data['month'] = data['Date'].dt.month
data['year'] = data['Date'].dt.year
data.drop(columns = ['dt'], axis = 1, inplace = True) 
# Updating  columns names
data.columns = [
    'Land Average Temperature', 'Land Average Temperature Uncertainity',
    'Land Max Temperature', 'Land Max Temperature Uncertainity',
    'Land Min Temperature', 'Land Min Temperature Uncertainty',
    'Land And Ocean Average Temperature', 'Land And Ocean Average Temperature Uncertainity',
    'Date', 'Day', 'Month','Year'
]
# Grouping data
earth_data = data.groupby(by = 'Year')[[
    'Land Average Temperature',
    'Land Average Temperature Uncertainity',
    'Land Max Temperature',
    'Land Max Temperature Uncertainity',
    'Land Min Temperature',
    'Land Min Temperature Uncertainty',
    'Land And Ocean Average Temperature',
    'Land And Ocean Average Temperature Uncertainity']].mean()
# Resetting index to year
earth_data = earth_data.reset_index().set_index('Year')
earth_data.head()

Unnamed: 0_level_0,Land Average Temperature,Land Average Temperature Uncertainity,Land Max Temperature,Land Max Temperature Uncertainity,Land Min Temperature,Land Min Temperature Uncertainty,Land And Ocean Average Temperature,Land And Ocean Average Temperature Uncertainity
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1850,7.900667,0.876417,13.476667,2.394833,1.964333,1.571167,14.867167,0.308167
1851,8.178583,0.881917,13.081,2.39725,2.203917,1.632417,14.991833,0.312083
1852,8.100167,0.91825,13.397333,2.61925,2.337,1.382917,15.0065,0.316417
1853,8.041833,0.835,13.886583,2.095083,1.8925,1.355583,14.955167,0.283833
1854,8.2105,0.825667,13.977417,1.783333,1.762167,1.357,14.991,0.276417


## Describing data

In [5]:
earth_data[['Land Average Temperature', 'Land Max Temperature', 
            'Land Min Temperature', 'Land And Ocean Average Temperature']].describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Land Average Temperature,166.0,8.571583,0.473687,7.558583,8.195708,8.54075,8.79125,9.831
Land Max Temperature,166.0,14.350601,0.447741,13.081,14.055917,14.307708,14.539167,15.572667
Land Min Temperature,166.0,2.743595,0.614124,1.525083,2.262562,2.734917,3.126833,4.148833
Land And Ocean Average Temperature,166.0,15.212566,0.298629,14.740083,14.991208,15.144208,15.379104,16.058583


In [6]:
title_font = {"size": 20, "family": "Arial", "color": "black"}
axis_title_font = {"size": 14, "family": "Arial", "color": "black"}
axis_tick_font = {"size": 12, "family": "Arial", "color": "black"}
legend_font = {"size": 12, "family": "Arial", "color": "black"}
background_color = '#ffffff'
grid_color= "#dcdcdc"

In [9]:
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=earth_data.index, 
    y=earth_data['Land Average Temperature'], 
    mode='lines',
    name='Land Avg Temp',
    line=dict(color='black', width=2),
    legendgroup='Land Avg Temp',
))
fig.add_trace(go.Scatter(
    x=earth_data.index, 
    y=earth_data['Land Average Temperature'] + earth_data['Land Average Temperature Uncertainity'],
    mode='lines',
    name='Land Avg Temp Error Upper',
    fill='tonexty',
))
fig.add_trace(go.Scatter(
    x=earth_data.index, 
    y=earth_data['Land Average Temperature'] - earth_data['Land Average Temperature Uncertainity'],
    mode='lines',
    name='Land Avg Temp Error Lower',
    fill='tonexty',
))

fig.update_layout(
    title='Land Average Temperature and Uncertainty Over the Years',
    title_x=0.5,
    title_font=title_font,
    xaxis=dict(
        title='Year',
        title_font=axis_title_font,
        tickfont=axis_tick_font,
        showgrid=True,
        gridcolor=grid_color
    ),
    yaxis=dict(
        title='Temperature (°C)',
        title_font=axis_title_font,
        tickfont=axis_tick_font,
        showgrid=True,
        gridcolor=grid_color
    ),
    plot_bgcolor=background_color,
    hovermode='closest',
    paper_bgcolor=background_color,
    showlegend=True,
    legend=dict(
        x=0.82, 
        y=0.10, 
        bgcolor='rgba(255, 255, 255, 0.7)',
        bordercolor='black',
        borderwidth=1
    ),
)

pio.write_image(fig, "images/land_temperature_uncertainty.png", scale=2)  # Requires kaleido
display(HTML('<img src="images/land_temperature_uncertainty.png" width="900">'))

fig.write_html("charts/land_temperature_uncertainty.html")

display(HTML('<a href="charts/land_temperature_uncertainty.html" target="_blank">View Interactive Plot</a>'))

In [10]:
fig = make_subplots(rows=2, cols=2)
#Land Average Temperature
fig.add_trace(
    go.Scatter(
        x=earth_data.index, 
        y=earth_data['Land Average Temperature'],
        name='Land Avg Temp', 
        mode='lines', marker_color='rgb(128, 0, 0)'
    ), 
    row=1, col=1
)
fig.add_trace(
    go.Scatter(
        x=[1975, 1975], 
        y=[7.5, 10], 
        mode="lines", 
        line=go.scatter.Line(color="gray"), 
        showlegend=False
    ),
    row=1, col=1
)
#Land Min Temperature
fig.add_trace(
    go.Scatter(
        x=earth_data.index, 
        y=earth_data['Land Min Temperature'],
        name='Land Min Temp',
        mode='lines', marker_color='rgb(210,105,30)'
    ), 
    row=1, col=2
)
fig.add_trace(
    go.Scatter(
        x=[1975, 1975], 
        y=[1.5, 4.5], 
        mode="lines", 
        line=go.scatter.Line(color="gray"), 
        showlegend=False
    ),
    row=1, col=2
)
#Land Max Temperature
fig.add_trace(
    go.Scatter(
        x=earth_data.index, 
        y=earth_data['Land Max Temperature'], 
        name='Land Max Temp',
        mode='lines', marker_color='rgb(135,206,235)'
    ), 
    row=2, col=1
)
fig.add_trace(
    go.Scatter(
        x=[1975, 1975], 
        y=[13, 15.5], 
        mode="lines", 
        line=go.scatter.Line(color="gray"), 
        showlegend=False
    ),
    row=2, col=1
)
#Land and Ocean Average Temperature
fig.add_trace(
    go.Scatter(
        x=earth_data.index, 
        y=earth_data['Land And Ocean Average Temperature'], 
        name='Land and Ocean Avg Temp',
        mode='lines', marker_color='rgb(107,142,35)'
    ), 
    row=2, col=2
)
fig.add_trace(
    go.Scatter(
        x=[1975, 1975], 
        y=[14.5, 16], 
        mode="lines", 
        line=go.scatter.Line(color="gray"), 
        showlegend=False
    ),         
    row=2, col=2
)

fig.update_layout(
    title="When Global Warming Started?",  
    title_font=title_font,
    title_x=0.5,
    hovermode='closest',
    plot_bgcolor=background_color,
    paper_bgcolor=background_color,
    legend_font=legend_font
)
fig.update_yaxes(title_text="Temp (°C)", title_font=axis_title_font, tickfont=axis_tick_font, gridcolor=grid_color, row=1, col=1)
fig.update_yaxes(title_text="Temp (°C)", title_font=axis_title_font, tickfont=axis_tick_font, gridcolor=grid_color, row=1, col=2)
fig.update_yaxes(title_text="Temp (°C)", title_font=axis_title_font, tickfont=axis_tick_font, gridcolor=grid_color, row=2, col=1)
fig.update_yaxes(title_text="Temp (°C)", title_font=axis_title_font, tickfont=axis_tick_font, gridcolor=grid_color, row=2, col=2)
fig.update_xaxes(title_text="Year", title_font=axis_title_font, tickfont=axis_tick_font, gridcolor=grid_color, row=1, col=1)
fig.update_xaxes(title_text="Year", title_font=axis_title_font, tickfont=axis_tick_font, gridcolor=grid_color, row=1, col=2)
fig.update_xaxes(title_text="Year", title_font=axis_title_font, tickfont=axis_tick_font, gridcolor=grid_color, row=2, col=1)
fig.update_xaxes(title_text="Year", title_font=axis_title_font, tickfont=axis_tick_font,gridcolor=grid_color, row=2, col=2)

pio.write_image(fig, "images/global_warming_start.png", scale=2) 
display(HTML('<img src="images/global_warming_start.png" width="900">'))

fig.write_html("charts/global_warming_start.html")
display(HTML('<a href="charts/global_warming_start.html" target="_blank">View Interactive Plot</a>'))

In [11]:
# Change in average temperature before/after 1975
earth_data['turnpoint'] = np.where(earth_data.index <= 1975, 'Before 1975', 'After 1975')

fig = make_subplots(rows=2, cols=2)
#Land Average Temperature
fig.add_trace(
    go.Box(
        x=earth_data['Land Average Temperature'], 
        y=earth_data['turnpoint'], 
        boxpoints='all', jitter=0.3, pointpos=-1.6, 
        marker_color='rgb(128, 0, 0)', boxmean=True, 
        name='Land Avg Temp'
    ),
    row=1, col=1
)
#Land Min Temperature
fig.add_trace(
    go.Box(
        x=earth_data['Land Min Temperature'], 
        y=earth_data['turnpoint'], 
        boxpoints='all', jitter=0.3, pointpos=-1.6, 
        marker_color='rgb(210,105,30)', boxmean=True, 
        name='Land Min Temp'
    ),
    row=1, col=2
)
#Land Max Temperature
fig.add_trace(
    go.Box(
        x=earth_data['Land Max Temperature'], 
        y=earth_data['turnpoint'], 
        boxpoints='all', jitter=0.3, pointpos=-1.6, 
        marker_color='rgb(135,206,235)', boxmean=True, 
        name='Land Max Temp'
    ),
    row=2, col=1
)
#Land and Ocean Average Temperature
fig.add_trace(
    go.Box(
        x=earth_data['Land And Ocean Average Temperature'], 
        y=earth_data['turnpoint'], 
        boxpoints='all', jitter=0.3, pointpos=-1.6, 
        marker_color='rgb(107,142,35)', boxmean=True, 
        name='Land & Ocean Avg Temp'
    ),
    row=2, col=2
)
fig.update_layout(
    title="Average Temperatures Before and After 1975",
    title_x=0.5,
    title_font=title_font,
    hovermode='closest',
    plot_bgcolor=background_color,
    paper_bgcolor=background_color
)
fig.update_traces(orientation='h')

fig.update_xaxes(title_text="Temp (°C)", title_font=axis_title_font, tickfont=axis_tick_font, gridcolor=grid_color, row=1, col=1)
fig.update_xaxes(title_text="Temp (°C)", title_font=axis_title_font, tickfont=axis_tick_font, gridcolor=grid_color, row=1, col=2)
fig.update_xaxes(title_text="Temp (°C)", title_font=axis_title_font, tickfont=axis_tick_font, gridcolor=grid_color, row=2, col=1)
fig.update_xaxes(title_text="Temp (°C)", title_font=axis_title_font, tickfont=axis_tick_font, gridcolor=grid_color, row=2, col=2)

pio.write_image(fig, "images/average_land_temperature_1975.png", scale=2) 
display(HTML('<img src="images/average_land_temperature_1975.png" width="900">'))

fig.write_html("charts/average_land_temperature_1975.html")
display(HTML('<a href="charts/average_land_temperature_1975.html" target="_blank">View Interactive Plot</a>'))