# Dataset: https://www.kaggle.com/code/andradaolteanu/plotly-advanced-global-warming-analysis

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px

#ask chatgpt why we use this 
%matplotlib inline 

In [2]:
Temp = pd.read_csv('GlobalTemperatures.csv', usecols=['dt', 'LandAverageTemperature', 'LandMaxTemperature', 'LandMinTemperature', 'LandAndOceanAverageTemperature'])
Temp.head(5)

Unnamed: 0,dt,LandAverageTemperature,LandMaxTemperature,LandMinTemperature,LandAndOceanAverageTemperature
0,1750-01-01,3.034,,,
1,1750-02-01,3.083,,,
2,1750-03-01,5.626,,,
3,1750-04-01,8.49,,,
4,1750-05-01,11.573,,,


In [3]:
Temp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3192 entries, 0 to 3191
Data columns (total 5 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   dt                              3192 non-null   object 
 1   LandAverageTemperature          3180 non-null   float64
 2   LandMaxTemperature              1992 non-null   float64
 3   LandMinTemperature              1992 non-null   float64
 4   LandAndOceanAverageTemperature  1992 non-null   float64
dtypes: float64(4), object(1)
memory usage: 124.8+ KB


---
### The date column datatype was set to object so we have to change it to datetime in order analyze it poroperly later on. Additionally I have remove the rows with missing values 

In [4]:
Temp['dt'] = pd.to_datetime(Temp.dt, format= '%Y-%m-%d')
Temp.dropna(axis = 0, inplace=True)

In [5]:
Temp.describe().round(2)

Unnamed: 0,LandAverageTemperature,LandMaxTemperature,LandMinTemperature,LandAndOceanAverageTemperature
count,1992.0,1992.0,1992.0,1992.0
mean,8.57,14.35,2.74,15.21
std,4.26,4.31,4.16,1.27
min,0.4,5.9,-5.41,12.48
25%,4.43,10.21,-1.33,14.05
50%,8.85,14.76,2.95,15.25
75%,12.86,18.45,6.78,16.4
max,15.48,21.32,9.72,17.61


In [6]:
Temp.describe().loc[['min', 'mean', 'max']].round(2)

Unnamed: 0,LandAverageTemperature,LandMaxTemperature,LandMinTemperature,LandAndOceanAverageTemperature
min,0.4,5.9,-5.41,12.48
mean,8.57,14.35,2.74,15.21
max,15.48,21.32,9.72,17.61


---
### I have changed the name of the date column from 'dt' to 'Year' and adjusted its values to represent specific years

In [7]:
ByYear = Temp.copy()
ByYear['Year'] = ByYear['dt'].dt.year


GroupByYear = ByYear.groupby(by = 'Year' )[['LandAverageTemperature', 'LandMaxTemperature', 'LandMinTemperature', 'LandAndOceanAverageTemperature'  ]].mean().reset_index()
GroupByYear

Unnamed: 0,Year,LandAverageTemperature,LandMaxTemperature,LandMinTemperature,LandAndOceanAverageTemperature
0,1850,7.900667,13.476667,1.964333,14.867167
1,1851,8.178583,13.081000,2.203917,14.991833
2,1852,8.100167,13.397333,2.337000,15.006500
3,1853,8.041833,13.886583,1.892500,14.955167
4,1854,8.210500,13.977417,1.762167,14.991000
...,...,...,...,...,...
161,2011,9.516000,15.284833,3.827667,15.769500
162,2012,9.507333,15.332833,3.756167,15.802333
163,2013,9.606500,15.373833,3.911333,15.854417
164,2014,9.570667,15.313583,3.877750,15.913000


In [8]:
GroupByYear.describe().round(2)

Unnamed: 0,Year,LandAverageTemperature,LandMaxTemperature,LandMinTemperature,LandAndOceanAverageTemperature
count,166.0,166.0,166.0,166.0,166.0
mean,1932.5,8.57,14.35,2.74,15.21
std,48.06,0.47,0.45,0.61,0.3
min,1850.0,7.56,13.08,1.53,14.74
25%,1891.25,8.2,14.06,2.26,14.99
50%,1932.5,8.54,14.31,2.73,15.14
75%,1973.75,8.79,14.54,3.13,15.38
max,2015.0,9.83,15.57,4.15,16.06


---
### Below are visualization of the data using the visualisation library 'Plotly'

In [13]:
fig = px.line(GroupByYear, x='Year', y='LandAverageTemperature', title='Land Average Temperature Over The Years', labels={'Temperature Measured in Celsius'})
fig.update_traces(line=dict(color='royalblue', width=2))
fig.update_layout(plot_bgcolor='white',
                  xaxis=dict(showgrid=True, gridwidth=0.5, gridcolor='grey'),
                  yaxis=dict(showgrid=True, gridwidth=0.5, gridcolor='grey'))


fig.show()

In [10]:
fig2 = go.Figure()

fig2.add_trace(go.Scatter(x=GroupByYear['Year'], y=GroupByYear['LandMaxTemperature'],
                          mode='lines',
                          line=dict(color='royalblue', width=2), name='Max Temperature'))

fig2.add_trace(go.Scatter(x=GroupByYear['Year'], y=GroupByYear['LandMinTemperature'],
                          mode='lines',
                          line=dict(color='green', width=2), name='Max Temperature'))

fig2.update_layout(title='Land Temperature Over The Years',
                   xaxis=dict(title='Year', showgrid=True, gridwidth=0.5, gridcolor='grey'),
                   yaxis=dict(title='Temperature in Celsius', showgrid=True, gridwidth=0.5, gridcolor='grey'),
                   plot_bgcolor='white',
                   legend=dict(title='Temperature Type', orientation='h', y=1.1, x=0.5))

fig2.show() 

In [12]:
fig3 = px.line(GroupByYear, x='Year', y='LandAndOceanAverageTemperature', title='Land And Ocean Average Temperature Over the Years')
fig.update_traces(line=dict(color='royalblue', width=2))
fig.update_layout(xaxis=dict(title='year', showgrid=True, gridwidth=0.5, gridcolor='grey'),
                  yaxis=dict(title='Temperature In Celsius', showgrid=True, gridwidth=0.5, gridcolor='grey'),
                  plot_bgcolor='white', 
                  legend=dict(title='Temperature Type', orientation='h', y=1.1, x=0.5))

fig3.show()