In [13]:
# Import Dependencies
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

In [18]:
# Load Data
sample_df = pd.read_csv('sample_global_temp.csv')
sample_df

Unnamed: 0.1,Unnamed: 0,dt,LandAverageTemperature,LandAverageTemperatureUncertainty,LandMaxTemperature,LandMaxTemperatureUncertainty,LandMinTemperature,LandMinTemperatureUncertainty,LandAndOceanAverageTemperature,LandAndOceanAverageTemperatureUncertainty
0,3000,2000-01-01,2.950,0.091,8.349,0.103,-2.322,0.072,13.773,0.064
1,3001,2000-02-01,4.184,0.093,9.863,0.096,-1.371,0.092,14.266,0.065
2,3002,2000-03-01,6.219,0.120,12.205,0.133,0.376,0.082,14.846,0.069
3,3003,2000-04-01,9.552,0.075,15.534,0.156,3.680,0.153,15.762,0.062
4,3004,2000-05-01,11.874,0.064,17.721,0.127,5.997,0.200,16.395,0.061
...,...,...,...,...,...,...,...,...,...,...
187,3187,2015-08-01,14.755,0.072,20.699,0.110,9.005,0.170,17.589,0.057
188,3188,2015-09-01,12.999,0.079,18.845,0.088,7.199,0.229,17.049,0.058
189,3189,2015-10-01,10.801,0.102,16.450,0.059,5.232,0.115,16.290,0.062
190,3190,2015-11-01,7.433,0.119,12.892,0.093,2.157,0.106,15.252,0.063


In [19]:
# dataframe shape
sample_df.shape

(192, 10)

In [20]:
# df columns
sample_df.columns

Index(['Unnamed: 0', 'dt', 'LandAverageTemperature',
       'LandAverageTemperatureUncertainty', 'LandMaxTemperature',
       'LandMaxTemperatureUncertainty', 'LandMinTemperature',
       'LandMinTemperatureUncertainty', 'LandAndOceanAverageTemperature',
       'LandAndOceanAverageTemperatureUncertainty'],
      dtype='object')

In [21]:
# Drop columns
sample_df = sample_df.drop(columns=['Unnamed: 0', 'LandAverageTemperatureUncertainty', 'LandMaxTemperatureUncertainty', 'LandMinTemperatureUncertainty', 'LandMaxTemperatureUncertainty', 'LandAndOceanAverageTemperatureUncertainty' ], axis=1)
sample_df

Unnamed: 0,dt,LandAverageTemperature,LandMaxTemperature,LandMinTemperature,LandAndOceanAverageTemperature
0,2000-01-01,2.950,8.349,-2.322,13.773
1,2000-02-01,4.184,9.863,-1.371,14.266
2,2000-03-01,6.219,12.205,0.376,14.846
3,2000-04-01,9.552,15.534,3.680,15.762
4,2000-05-01,11.874,17.721,5.997,16.395
...,...,...,...,...,...
187,2015-08-01,14.755,20.699,9.005,17.589
188,2015-09-01,12.999,18.845,7.199,17.049
189,2015-10-01,10.801,16.450,5.232,16.290
190,2015-11-01,7.433,12.892,2.157,15.252


In [22]:
# Target and Feature
target = 'LandAndOceanAverageTemperature'
y = sample_df[target]
X = sample_df[['LandAverageTemperature', 'LandMaxTemperature', 'LandMinTemperature']]

In [24]:
# Train and Test Split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.25, random_state=42)

In [25]:
X_train.shape

(144, 3)

In [26]:
# Find MAE
from sklearn.metrics import mean_absolute_error
y_pred = [y_train.mean()]*len(y_train)
print("Baseline MAE:", round(mean_absolute_error(y_train, y_pred),5))

Baseline MAE: 1.11778


In [27]:
# Creating pipeline for Linear Regression Model
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

lm = make_pipeline(StandardScaler(),
                  LinearRegression(),
                  )
lm.fit(X_train,y_train)

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('linearregression', LinearRegression())])

In [28]:
# Creating Random Forest Regressor Model
from sklearn.feature_selection import SelectKBest
from sklearn.ensemble import RandomForestRegressor

model = make_pipeline(
    SelectKBest(k='all'),
    StandardScaler(),
    RandomForestRegressor(
                        n_estimators=100,
                        max_depth=50,
                        random_state=77,
                        n_jobs=-1)
)
model.fit(X_train,y_train)

Pipeline(steps=[('selectkbest', SelectKBest(k='all')),
                ('standardscaler', StandardScaler()),
                ('randomforestregressor',
                 RandomForestRegressor(max_depth=50, n_jobs=-1,
                                       random_state=77))])