In [4]:
#In this notebook,we will use Climate Change data for time series analysis.

In [5]:
#Importing Liabraries
import pandas as pd
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import numpy as np
from statsmodels.tsa.seasonal import seasonal_decompose
import seaborn as sns
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

In [6]:
#Importing Data
df = pd.read_csv('../input/climate-change-earth-surface-temperature-data/GlobalLandTemperaturesByState.csv')
df.head()

In [7]:
#Exploring Dataset
df.dtypes

In [8]:
df.shape

In [9]:
df.isnull().sum()

In [10]:
#Droping all null values
df = df.dropna(how='any' ,axis=0)
df.shape

In [11]:
df.rename(columns={'dt' : 'Date','AverageTemperatureUncertainty': 'confidence_interval_temp'},inplace=True)
df.head()

In [12]:
#Converting 'Date column' into 'Datetime datatype'
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date',inplace = True)
df.index

In [13]:
df.describe()

In [14]:
#Now we use year as index
df['year']= df.index.year
df.head()

In [15]:
#Now we use latest data
latest_df = df.loc['1980':'2021']
latest_df.head()

In [16]:
df.describe()

In [17]:
#Grouping available data based on average temperature
latest_df[['Country','AverageTemperature']].groupby(['Country']).mean().sort_values('AverageTemperature')

In [18]:
#Plotting graph between year and average temperature
plt.figure(figsize=(9,4))
sns.lineplot(x = "year" , y = "AverageTemperature",data=latest_df)
plt.show()

In [19]:
resample_df = latest_df[['AverageTemperature']].resample('A').mean()
resample_df.head()

In [20]:
resample_df.plot(title='Temperature changes from 1980 - 2021',figsize=(8,5))
plt.ylabel('Temperature',fontsize=12)
plt.xlabel('year',fontsize=12)
plt.show()

In [21]:
from statsmodels.tsa.stattools import adfuller
print('Dickey Fuller Test Results')
test_df = adfuller(resample_df.iloc[:,0].values,autolag='AIC')
df_output = pd.Series(test_df[0:4],index=['Test Statistic','p-value','Lags Used','Number of Observation Used'])
for key,value in test_df[4].items():
    df_output['Critical value (%s)'%key] = value
print(df_output)

In [22]:
#As test statistic has greater value than critical value.So,we can say that time series is not stationary.
#Time series decomposition is a technique that splits a time series into several components, each representing an underlying pattern category, trend, seasonality, and noise.

In [38]:
#Now we decompose the data
decomp= seasonal_decompose(resample_df,freq=10)
trend = decomp.trend
seasonal = decomp.seasonal
residual = decomp.resid

plt.subplot(411)
plt.plot(resample_df)
plt.xlabel('Original')
plt.figure(figsize=(6,5))

plt.subplot(412)
plt.plot(trend)
plt.xlabel('Trend')
plt.figure(figsize=(6,5))

plt.subplot(413)
plt.plot(seasonal)
plt.xlabel('Seasonal')
plt.figure(figsize=(6,5))

plt.subplot(414)
plt.plot(residual)
plt.xlabel('Residual')
plt.figure(figsize=(6,5))

plt.tight_layout()

In [44]:
#Graph shows rolling mean,rolling standard deviation and ewm
rol_mean = resample_df.rolling(window=3,center=True).mean()
ewm = resample_df.ewm(span=5).mean()
rol_std = resample_df.rolling(window=3,center=True).std()
fig, (ax1, ax2) = plt.subplots(1,2, figsize=(12,5))

ax1.plot(resample_df,label='Original')
ax1.plot(rol_mean,label='Rolling Mean')
ax1.plot(ewm, label='Exponentially Weighted Mean')
ax1.set_title('Temperature changes from 1980-2013',fontsize=14)
ax1.set_ylabel('Temperature',fontsize=12)
ax1.set_xlabel('Year',fontsize=12)
ax1.legend()

ax2.plot(rol_std,label='Rolling STD')
ax2.set_title('Temperature changes from 1980-2013',fontsize=14)
ax2.set_ylabel('Temperature',fontsize=12)
ax2.set_xlabel('Year',fontsize=12)
ax2.legend()

plt.tight_layout()
plt.show()