In [2]:
# for presentation purposes
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
from jupyterthemes import jtplot
jtplot.style(grid=False)

# visualize 
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns


# working with dates
from datetime import datetime

# to evaluated performance using rmse
from sklearn.metrics import mean_squared_error
from math import sqrt 

# for tsa 
import statsmodels.api as sm

# holt's linear trend model. 
from statsmodels.tsa.api import Holt

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [3]:
major_city = pd.read_csv('GlobalLandTemperaturesByMajorCity.csv')
major_city.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude
0,1849-01-01,26.704,1.435,Abidjan,Côte D'Ivoire,5.63N,3.23W
1,1849-02-01,27.434,1.362,Abidjan,Côte D'Ivoire,5.63N,3.23W
2,1849-03-01,28.101,1.612,Abidjan,Côte D'Ivoire,5.63N,3.23W
3,1849-04-01,26.14,1.387,Abidjan,Côte D'Ivoire,5.63N,3.23W
4,1849-05-01,25.427,1.2,Abidjan,Côte D'Ivoire,5.63N,3.23W


In [5]:
major_city[major_city.Country == 'Philippines']

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude
141568,1845-01-01,24.995,1.871,Manila,Philippines,15.27N,120.83E
141569,1845-02-01,24.63,2.228,Manila,Philippines,15.27N,120.83E
141570,1845-03-01,26.512,1.369,Manila,Philippines,15.27N,120.83E
141571,1845-04-01,,,Manila,Philippines,15.27N,120.83E
141572,1845-05-01,27.564,1.498,Manila,Philippines,15.27N,120.83E
141573,1845-06-01,26.819,1.38,Manila,Philippines,15.27N,120.83E
141574,1845-07-01,26.414,1.306,Manila,Philippines,15.27N,120.83E
141575,1845-08-01,25.856,1.157,Manila,Philippines,15.27N,120.83E
141576,1845-09-01,25.289,1.172,Manila,Philippines,15.27N,120.83E
141577,1845-10-01,24.628,1.225,Manila,Philippines,15.27N,120.83E


In [6]:
city = pd.read_csv('GlobalLandTemperaturesByCity.csv')
city.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude
0,1743-11-01,6.068,1.737,Århus,Denmark,57.05N,10.33E
1,1743-12-01,,,Århus,Denmark,57.05N,10.33E
2,1744-01-01,,,Århus,Denmark,57.05N,10.33E
3,1744-02-01,,,Århus,Denmark,57.05N,10.33E
4,1744-03-01,,,Århus,Denmark,57.05N,10.33E


In [10]:
philippines = city[city.Country == 'Philippines']
philippines.City.value_counts().sort_values()

Dadiangas          1605
Tagum              1737
Digos              1737
Davao              1737
Pagadian           1773
Butuan             1773
Cotabato           1773
Koronadal          1773
Marawi             1773
Cagayan De Oro     1929
Roxas              1929
Tacloban           1929
Kabankalan         1929
Mandaue            1929
Iloilo             1929
Lapu Lapu          1929
Cadiz              1929
Iligan             1929
Talisay            1929
Bacolod            1929
Zamboanga          1929
Toledo             1929
Dumaguete          1929
Cebu               1929
Legaspi            2001
Santa Rosa         2001
Naga               2001
Lucena             2001
Batangas           2001
Santa Cruz         2001
San Pablo          2001
Dasmariñas         2001
San Pedro          2001
Binangonan         2001
Calamba            2001
Lipa               2001
Antipolo           2025
Baguio             2025
San Mateo          2025
Tuguegarao         2025
Cabanatuan         2025
Malolos         

In [11]:
manila = major_city[major_city.Country == 'Philippines']
manila.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2025 entries, 141568 to 143592
Data columns (total 7 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   dt                             2025 non-null   object 
 1   AverageTemperature             2021 non-null   float64
 2   AverageTemperatureUncertainty  2021 non-null   float64
 3   City                           2025 non-null   object 
 4   Country                        2025 non-null   object 
 5   Latitude                       2025 non-null   object 
 6   Longitude                      2025 non-null   object 
dtypes: float64(2), object(5)
memory usage: 126.6+ KB


In [14]:
manila = major_city[major_city.City == 'Manila']
manila.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2025 entries, 141568 to 143592
Data columns (total 7 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   dt                             2025 non-null   object 
 1   AverageTemperature             2021 non-null   float64
 2   AverageTemperatureUncertainty  2021 non-null   float64
 3   City                           2025 non-null   object 
 4   Country                        2025 non-null   object 
 5   Latitude                       2025 non-null   object 
 6   Longitude                      2025 non-null   object 
dtypes: float64(2), object(5)
memory usage: 126.6+ KB


In [15]:
def get_manila():
    major_city = pd.read_csv('GlobalLandTemperaturesByMajorCity.csv')
    manila = major_city[major_city.City == 'Manila']
    return manila

In [16]:
def clean_manila(df):
    df = df.fillna(df.mean())
    return df

In [18]:
from wrangle import wrangle_manila

In [19]:
df = wrangle_manila()
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2025 entries, 141568 to 143592
Data columns (total 7 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   dt                             2025 non-null   object 
 1   AverageTemperature             2025 non-null   float64
 2   AverageTemperatureUncertainty  2025 non-null   float64
 3   City                           2025 non-null   object 
 4   Country                        2025 non-null   object 
 5   Latitude                       2025 non-null   object 
 6   Longitude                      2025 non-null   object 
dtypes: float64(2), object(5)
memory usage: 126.6+ KB


In [21]:
df.dt.nunique()

2025

In [22]:
df.head()

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude
141568,1845-01-01,24.995,1.871,Manila,Philippines,15.27N,120.83E
141569,1845-02-01,24.63,2.228,Manila,Philippines,15.27N,120.83E
141570,1845-03-01,26.512,1.369,Manila,Philippines,15.27N,120.83E
141571,1845-04-01,26.448334,0.673189,Manila,Philippines,15.27N,120.83E
141572,1845-05-01,27.564,1.498,Manila,Philippines,15.27N,120.83E


In [23]:
df.AverageTemperatureUncertainty.describe()

count    2025.000000
mean        0.673189
std         0.485958
min         0.071000
25%         0.298000
50%         0.460000
75%         1.025000
max         3.850000
Name: AverageTemperatureUncertainty, dtype: float64

In [24]:
df.dt = pd.to_datetime(df.dt)
df

Unnamed: 0,dt,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude
141568,1845-01-01,24.995,1.871,Manila,Philippines,15.27N,120.83E
141569,1845-02-01,24.63,2.228,Manila,Philippines,15.27N,120.83E
141570,1845-03-01,26.512,1.369,Manila,Philippines,15.27N,120.83E
141571,1845-04-01,26.448334,0.673189,Manila,Philippines,15.27N,120.83E
141572,1845-05-01,27.564,1.498,Manila,Philippines,15.27N,120.83E
141573,1845-06-01,26.819,1.38,Manila,Philippines,15.27N,120.83E
141574,1845-07-01,26.414,1.306,Manila,Philippines,15.27N,120.83E
141575,1845-08-01,25.856,1.157,Manila,Philippines,15.27N,120.83E
141576,1845-09-01,25.289,1.172,Manila,Philippines,15.27N,120.83E
141577,1845-10-01,24.628,1.225,Manila,Philippines,15.27N,120.83E


In [25]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2025 entries, 141568 to 143592
Data columns (total 7 columns):
 #   Column                         Non-Null Count  Dtype         
---  ------                         --------------  -----         
 0   dt                             2025 non-null   datetime64[ns]
 1   AverageTemperature             2025 non-null   float64       
 2   AverageTemperatureUncertainty  2025 non-null   float64       
 3   City                           2025 non-null   object        
 4   Country                        2025 non-null   object        
 5   Latitude                       2025 non-null   object        
 6   Longitude                      2025 non-null   object        
dtypes: datetime64[ns](1), float64(2), object(4)
memory usage: 206.6+ KB


In [26]:
df = df.set_index('dt').sort_index()
df

Unnamed: 0_level_0,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1845-01-01,24.995,1.871,Manila,Philippines,15.27N,120.83E
1845-02-01,24.63,2.228,Manila,Philippines,15.27N,120.83E
1845-03-01,26.512,1.369,Manila,Philippines,15.27N,120.83E
1845-04-01,26.448334,0.673189,Manila,Philippines,15.27N,120.83E
1845-05-01,27.564,1.498,Manila,Philippines,15.27N,120.83E
1845-06-01,26.819,1.38,Manila,Philippines,15.27N,120.83E
1845-07-01,26.414,1.306,Manila,Philippines,15.27N,120.83E
1845-08-01,25.856,1.157,Manila,Philippines,15.27N,120.83E
1845-09-01,25.289,1.172,Manila,Philippines,15.27N,120.83E
1845-10-01,24.628,1.225,Manila,Philippines,15.27N,120.83E


In [30]:
df['month'] = df.index.month_name()
df['year'] = df.index.year
df

Unnamed: 0_level_0,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude,month,year
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1845-01-01,24.995,1.871,Manila,Philippines,15.27N,120.83E,January,1845
1845-02-01,24.63,2.228,Manila,Philippines,15.27N,120.83E,February,1845
1845-03-01,26.512,1.369,Manila,Philippines,15.27N,120.83E,March,1845
1845-04-01,26.448334,0.673189,Manila,Philippines,15.27N,120.83E,April,1845
1845-05-01,27.564,1.498,Manila,Philippines,15.27N,120.83E,May,1845
1845-06-01,26.819,1.38,Manila,Philippines,15.27N,120.83E,June,1845
1845-07-01,26.414,1.306,Manila,Philippines,15.27N,120.83E,July,1845
1845-08-01,25.856,1.157,Manila,Philippines,15.27N,120.83E,August,1845
1845-09-01,25.289,1.172,Manila,Philippines,15.27N,120.83E,September,1845
1845-10-01,24.628,1.225,Manila,Philippines,15.27N,120.83E,October,1845


In [31]:
def prepare_manila(df):
    df.dt = pd.to_datetime(df.dt)
    df = df.set_index('dt').sort_index()
    df['month'] = df.index.month_name()
    df['year'] = df.index.year
    
    return df

In [35]:
df['AverageTemperature']= df.AverageTemperature.apply(lambda x: (9/5)*x+32)
df['AverageTemperatureUncertainty']= df.AverageTemperatureUncertainty.apply(lambda x: (9/5)*x+32)
df

Unnamed: 0_level_0,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude,month,year,F
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1845-01-01,76.991,35.3678,Manila,Philippines,15.27N,120.83E,January,1845,76.991
1845-02-01,76.334,36.0104,Manila,Philippines,15.27N,120.83E,February,1845,76.334
1845-03-01,79.7216,34.4642,Manila,Philippines,15.27N,120.83E,March,1845,79.7216
1845-04-01,79.607002,33.211739,Manila,Philippines,15.27N,120.83E,April,1845,79.607002
1845-05-01,81.6152,34.6964,Manila,Philippines,15.27N,120.83E,May,1845,81.6152
1845-06-01,80.2742,34.484,Manila,Philippines,15.27N,120.83E,June,1845,80.2742
1845-07-01,79.5452,34.3508,Manila,Philippines,15.27N,120.83E,July,1845,79.5452
1845-08-01,78.5408,34.0826,Manila,Philippines,15.27N,120.83E,August,1845,78.5408
1845-09-01,77.5202,34.1096,Manila,Philippines,15.27N,120.83E,September,1845,77.5202
1845-10-01,76.3304,34.205,Manila,Philippines,15.27N,120.83E,October,1845,76.3304


In [41]:
df['decade'] = df.year.astype(str).apply(lambda x: x[:3] + '0').astype(int)
df

Unnamed: 0_level_0,AverageTemperature,AverageTemperatureUncertainty,City,Country,Latitude,Longitude,month,year,F,decade
dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1845-01-01,76.991,35.3678,Manila,Philippines,15.27N,120.83E,January,1845,76.991,1840
1845-02-01,76.334,36.0104,Manila,Philippines,15.27N,120.83E,February,1845,76.334,1840
1845-03-01,79.7216,34.4642,Manila,Philippines,15.27N,120.83E,March,1845,79.7216,1840
1845-04-01,79.607002,33.211739,Manila,Philippines,15.27N,120.83E,April,1845,79.607002,1840
1845-05-01,81.6152,34.6964,Manila,Philippines,15.27N,120.83E,May,1845,81.6152,1840
1845-06-01,80.2742,34.484,Manila,Philippines,15.27N,120.83E,June,1845,80.2742,1840
1845-07-01,79.5452,34.3508,Manila,Philippines,15.27N,120.83E,July,1845,79.5452,1840
1845-08-01,78.5408,34.0826,Manila,Philippines,15.27N,120.83E,August,1845,78.5408,1840
1845-09-01,77.5202,34.1096,Manila,Philippines,15.27N,120.83E,September,1845,77.5202,1840
1845-10-01,76.3304,34.205,Manila,Philippines,15.27N,120.83E,October,1845,76.3304,1840
