# Climate Change Prediction Using Time Series Analysis

In [1]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
from matplotlib import pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose

%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

In [18]:
df = pd.read_csv("./data/globaltempByState.csv")

In [19]:
df.head(10)

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,State,Country
0,1855-05-01,25.544,1.171,Acre,Brazil
1,1855-06-01,24.228,1.103,Acre,Brazil
2,1855-07-01,24.371,1.044,Acre,Brazil
3,1855-08-01,25.427,1.073,Acre,Brazil
4,1855-09-01,25.675,1.014,Acre,Brazil
5,1855-10-01,25.442,1.179,Acre,Brazil
6,1855-11-01,25.4,1.064,Acre,Brazil
7,1855-12-01,24.1,1.718,Acre,Brazil
8,1856-01-01,25.814,1.159,Acre,Brazil
9,1856-02-01,24.658,1.147,Acre,Brazil


## Check the Data Type of Each Column

In [21]:
df.dtypes

dt                                object
AverageTemperature               float64
AverageTemperatureUncertainty    float64
State                             object
Country                           object
dtype: object

## Check the Shape of The Data

In [27]:
df.shape

(645675, 5)

## Check for All Null Values

In [29]:
df.isnull().sum()

dt                                   0
AverageTemperature               25648
AverageTemperatureUncertainty    25648
State                                0
Country                              0
dtype: int64

## Drop the Null Valus

In [30]:
df = df.dropna(how = "any", axis = 0)

In [31]:
df

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,State,Country
0,1855-05-01,25.544,1.171,Acre,Brazil
1,1855-06-01,24.228,1.103,Acre,Brazil
2,1855-07-01,24.371,1.044,Acre,Brazil
3,1855-08-01,25.427,1.073,Acre,Brazil
4,1855-09-01,25.675,1.014,Acre,Brazil
...,...,...,...,...,...
645669,2013-04-01,15.710,0.461,Zhejiang,China
645670,2013-05-01,21.634,0.578,Zhejiang,China
645671,2013-06-01,24.679,0.596,Zhejiang,China
645672,2013-07-01,29.272,1.340,Zhejiang,China


## Recheck the shape of the Newly Wrangled Data

In [33]:
df.shape

(620027, 5)

## Rename the title of the Columns 

In [36]:
df.rename(columns = {'dt':'Date', 'AverageTemperature':'Avrg_Temp','AverageTemperatureUncertainty':'Confidence_interval_temp'}, inplace = True)
df.head(10)

Unnamed: 0,Date,Avrg_Temp,Confidence_interval_temp,State,Country
0,1855-05-01,25.544,1.171,Acre,Brazil
1,1855-06-01,24.228,1.103,Acre,Brazil
2,1855-07-01,24.371,1.044,Acre,Brazil
3,1855-08-01,25.427,1.073,Acre,Brazil
4,1855-09-01,25.675,1.014,Acre,Brazil
5,1855-10-01,25.442,1.179,Acre,Brazil
6,1855-11-01,25.4,1.064,Acre,Brazil
7,1855-12-01,24.1,1.718,Acre,Brazil
8,1856-01-01,25.814,1.159,Acre,Brazil
9,1856-02-01,24.658,1.147,Acre,Brazil


### Use the Date Column as The Index of Our Dataset

- First we convert it to a datetime before we index it

In [38]:
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace = True)
df.index

DatetimeIndex(['1855-05-01', '1855-06-01', '1855-07-01', '1855-08-01',
               '1855-09-01', '1855-10-01', '1855-11-01', '1855-12-01',
               '1856-01-01', '1856-02-01',
               ...
               '2012-11-01', '2012-12-01', '2013-01-01', '2013-02-01',
               '2013-03-01', '2013-04-01', '2013-05-01', '2013-06-01',
               '2013-07-01', '2013-08-01'],
              dtype='datetime64[ns]', name='Date', length=620027, freq=None)

### Describe our Dataset
- This enables us to view our dataset characteristics

In [39]:
df.describe()

Unnamed: 0,Avrg_Temp,Confidence_interval_temp
count,620027.0,620027.0
mean,8.993111,1.287647
std,13.77215,1.360392
min,-45.389,0.036
25%,-0.693,0.316
50%,11.199,0.656
75%,19.899,1.85
max,36.339,12.646
