In [2]:
#CLIMATE CHANGE TIME SERIES ANALYSIS

#Importing Liabraries
import pandas as pd
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import numpy as np
from statsmodels.tsa.seasonal import seasonal_decompose
import seaborn as sns
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
#importing data
df = pd.read_csv(r"C:\Users\HP\Documents\CV\GlobalLandTemperaturesByCity.csv")

In [4]:
df.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude
0,1743-11-01,6.068,1.737,Århus,Denmark,57.05N,10.33E
1,1743-12-01,,,Århus,Denmark,57.05N,10.33E
2,1744-01-01,,,Århus,Denmark,57.05N,10.33E
3,1744-02-01,,,Århus,Denmark,57.05N,10.33E
4,1744-03-01,,,Århus,Denmark,57.05N,10.33E


In [5]:
#Exploring data types
df.dtypes

dt                                object
AverageTemperature               float64
AverageTemperatureUncertainty    float64
City                              object
Country                           object
Latitude                          object
Longitude                         object
dtype: object

In [6]:
df.shape

(1048575, 7)

In [7]:
df.isnull().sum()

dt                                   0
AverageTemperature               47547
AverageTemperatureUncertainty    47547
City                                 0
Country                              0
Latitude                             0
Longitude                            0
dtype: int64

In [8]:
#drop all null values
df=df.dropna(how= 'any',axis=0)

In [9]:
df.shape

(1001028, 7)

In [11]:
#rename columns
df.rename(columns={'dt' : 'Date','AverageTemperatureUncertainty': 'confidence_interval_temp'},inplace=True)
df.head()

Unnamed: 0,Date,AverageTemperature,confidence_interval_temp,City,Country,Latitude,Longitude
0,1743-11-01,6.068,1.737,Århus,Denmark,57.05N,10.33E
5,1744-04-01,5.788,3.624,Århus,Denmark,57.05N,10.33E
6,1744-05-01,10.644,1.283,Århus,Denmark,57.05N,10.33E
7,1744-06-01,14.051,1.347,Århus,Denmark,57.05N,10.33E
8,1744-07-01,16.082,1.396,Århus,Denmark,57.05N,10.33E


In [12]:
#Data Preparation. 
#converting Date time column to "Date time" datatype
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date',inplace = True)
df.index

DatetimeIndex(['1743-11-01', '1744-04-01', '1744-05-01', '1744-06-01',
               '1744-07-01', '1744-09-01', '1744-10-01', '1744-11-01',
               '1744-12-01', '1745-01-01',
               ...
               '1783-11-01', '1783-12-01', '1784-01-01', '1784-02-01',
               '1784-03-01', '1784-04-01', '1784-05-01', '1784-06-01',
               '1784-07-01', '1784-08-01'],
              dtype='datetime64[ns]', name='Date', length=1001028, freq=None)

In [13]:
df.describe()

Unnamed: 0,AverageTemperature,confidence_interval_temp
count,1001028.0,1001028.0
mean,17.92899,1.033831
std,10.35884,1.103003
min,-31.874,0.036
25%,12.017,0.348
50%,20.486,0.607
75%,25.926,1.38
max,39.156,15.03


In [14]:
#Now we use year as an index
df['year']= df.index.year
df.head()

Unnamed: 0_level_0,AverageTemperature,confidence_interval_temp,City,Country,Latitude,Longitude,year
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1743-11-01,6.068,1.737,Århus,Denmark,57.05N,10.33E,1743
1744-04-01,5.788,3.624,Århus,Denmark,57.05N,10.33E,1744
1744-05-01,10.644,1.283,Århus,Denmark,57.05N,10.33E,1744
1744-06-01,14.051,1.347,Århus,Denmark,57.05N,10.33E,1744
1744-07-01,16.082,1.396,Århus,Denmark,57.05N,10.33E,1744


In [15]:
#Filtering to use latest data
latest_df= df.loc['1980': '2021']

In [16]:
latest_df.head()

Unnamed: 0_level_0,AverageTemperature,confidence_interval_temp,City,Country,Latitude,Longitude,year
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1980-01-01,-1.85,0.363,Århus,Denmark,57.05N,10.33E,1980
1980-01-02,-2.171,0.377,Århus,Denmark,57.05N,10.33E,1980
1980-01-03,0.625,0.178,Århus,Denmark,57.05N,10.33E,1980
1980-01-04,6.151,0.203,Århus,Denmark,57.05N,10.33E,1980
1980-01-05,11.434,0.226,Århus,Denmark,57.05N,10.33E,1980


In [17]:
latest_df.describe()

Unnamed: 0,AverageTemperature,confidence_interval_temp,year
count,170519.0,170519.0,170519.0
mean,19.364059,0.365391,1996.339663
std,9.96612,0.196611,9.721861
min,-28.973,0.036,1980.0
25%,14.092,0.241,1988.0
50%,22.245,0.32,1996.0
75%,26.74,0.435,2005.0
max,39.156,5.148,2013.0


In [18]:
#Grouping data countrywise based on avergae temperature
latest_df[['Country','AverageTemperature']].groupby(['Country']).mean().sort_values('AverageTemperature')

Unnamed: 0_level_0,AverageTemperature
Country,Unnamed: 1_level_1
Norway,2.581287
Russia,4.224789
Kazakhstan,6.503722
Canada,7.376665
Switzerland,7.703418
...,...
Benin,27.665557
Guinea Bissau,27.729134
Thailand,27.765322
Somalia,27.964663


In [None]:
#Plot graph between year and average temperature
plt.figure(figsize=(9,4))
sns.lineplot(x = "year" , y = "AverageTemperature",data=latest_df)
plt.show()