In [1]:
# Import Dependencies
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import numpy as np

In [2]:
# Load Data
temp_df = pd.read_csv("GlobalTemperatures.csv")
temp_df

Unnamed: 0,dt,LandAverageTemperature,LandAverageTemperatureUncertainty,LandMaxTemperature,LandMaxTemperatureUncertainty,LandMinTemperature,LandMinTemperatureUncertainty,LandAndOceanAverageTemperature,LandAndOceanAverageTemperatureUncertainty
0,1750-01-01,3.034,3.574,,,,,,
1,1750-02-01,3.083,3.702,,,,,,
2,1750-03-01,5.626,3.076,,,,,,
3,1750-04-01,8.490,2.451,,,,,,
4,1750-05-01,11.573,2.072,,,,,,
...,...,...,...,...,...,...,...,...,...
3187,2015-08-01,14.755,0.072,20.699,0.110,9.005,0.170,17.589,0.057
3188,2015-09-01,12.999,0.079,18.845,0.088,7.199,0.229,17.049,0.058
3189,2015-10-01,10.801,0.102,16.450,0.059,5.232,0.115,16.290,0.062
3190,2015-11-01,7.433,0.119,12.892,0.093,2.157,0.106,15.252,0.063


In [3]:
# Checking the shape
temp_df.shape

(3192, 9)

In [4]:
# CHecking columns
temp_df.columns

Index(['dt', 'LandAverageTemperature', 'LandAverageTemperatureUncertainty',
       'LandMaxTemperature', 'LandMaxTemperatureUncertainty',
       'LandMinTemperature', 'LandMinTemperatureUncertainty',
       'LandAndOceanAverageTemperature',
       'LandAndOceanAverageTemperatureUncertainty'],
      dtype='object')

In [5]:
# Drop columns
temp_df = temp_df.drop(columns=['LandAverageTemperatureUncertainty', 'LandMaxTemperatureUncertainty', 'LandMinTemperatureUncertainty', 'LandMaxTemperatureUncertainty', 'LandAndOceanAverageTemperatureUncertainty' ], axis=1)
temp_df

Unnamed: 0,dt,LandAverageTemperature,LandMaxTemperature,LandMinTemperature,LandAndOceanAverageTemperature
0,1750-01-01,3.034,,,
1,1750-02-01,3.083,,,
2,1750-03-01,5.626,,,
3,1750-04-01,8.490,,,
4,1750-05-01,11.573,,,
...,...,...,...,...,...
3187,2015-08-01,14.755,20.699,9.005,17.589
3188,2015-09-01,12.999,18.845,7.199,17.049
3189,2015-10-01,10.801,16.450,5.232,16.290
3190,2015-11-01,7.433,12.892,2.157,15.252


In [6]:
# Drop null values
temp_df = temp_df.dropna()

In [7]:
# Checking null values
temp_df.isnull().sum()

dt                                0
LandAverageTemperature            0
LandMaxTemperature                0
LandMinTemperature                0
LandAndOceanAverageTemperature    0
dtype: int64

In [8]:
# Target and Feature
target = 'LandAndOceanAverageTemperature'
y = temp_df[target]
X = temp_df[['LandAverageTemperature', 'LandMaxTemperature', 'LandMinTemperature']]

In [9]:
# Train and Test Split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.25, random_state=42)

In [10]:
# Checking shapes
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(1494, 3)
(1494,)
(498, 3)
(498,)


In [11]:
# Find MAE
from sklearn.metrics import mean_absolute_error
y_pred = [y_train.mean()]*len(y_train)
print("Baseline MAE:", round(mean_absolute_error(y_train, y_pred),5))

Baseline MAE: 1.13177


In [12]:
# Creating pipeline for Linear Regression Model
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

lm = make_pipeline(StandardScaler(),
                  LinearRegression(),
                  )
lm.fit(X_train,y_train)

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('linearregression', LinearRegression())])

In [13]:
# Creating Random Forest Regressor Model
from sklearn.feature_selection import SelectKBest
from sklearn.ensemble import RandomForestRegressor

model = make_pipeline(
    SelectKBest(k='all'),
    StandardScaler(),
    RandomForestRegressor(
                        n_estimators=100,
                        max_depth=50,
                        random_state=77,
                        n_jobs=-1)
)
model.fit(X_train,y_train)

Pipeline(steps=[('selectkbest', SelectKBest(k='all')),
                ('standardscaler', StandardScaler()),
                ('randomforestregressor',
                 RandomForestRegressor(max_depth=50, n_jobs=-1,
                                       random_state=77))])