# Import Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
%matplotlib inline

# Read Dataset

In [3]:
dataset = pd.read_csv('model_selection.csv')

In [4]:
dataset

Unnamed: 0,AT,V,AP,RH,PE
0,14.96,41.76,1024.07,73.17,463.26
1,25.18,62.96,1020.04,59.08,444.37
2,5.11,39.40,1012.16,92.14,488.56
3,20.86,57.32,1010.24,76.64,446.48
4,10.82,37.50,1009.23,96.62,473.90
...,...,...,...,...,...
9563,16.65,49.69,1014.01,91.00,460.03
9564,13.19,39.18,1023.67,66.78,469.62
9565,31.32,74.33,1012.92,36.48,429.57
9566,24.48,69.45,1013.86,62.39,435.74


In [5]:
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

# Train Test Split

In [6]:
from sklearn.model_selection import train_test_split

def dataset_split(X, y):
    return train_test_split(X, y, test_size=0.2, random_state=0)

# Feature Scaling

In [7]:
from sklearn.preprocessing import StandardScaler

def independent_var_scaler(X_train, X_test):
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    return scaler, X_train, X_test

def dependent_var_scaler(y_train, y_test):
    scaler = StandardScaler()
    y_train = scaler.fit_transform(y_train.reshape(len(y_train), 1))
    y_test = scaler.transform(y_test.reshape(len(y_test), 1))
    return scaler, y_train.reshape(1, len(y_train))[0], y_test.reshape(1, len(y_test))[0]

# Model Training

In [8]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

def linear_regression(X, y):
    lr = LinearRegression()
    lr.fit(X, y)
    return lr

def polynomial_linear_regression(X, y, degree):
    lr = LinearRegression()
    pf = PolynomialFeatures(degree=degree)
    X_poly = pf.fit_transform(X)
    lr.fit(X_poly, y)
    return lr, X_poly, pf

def sv_regressor(X, y, kernel):
    svr = SVR(kernel=kernel)
    svr.fit(X, y)
    return svr

def decision_tree(X, y):
    tree = DecisionTreeRegressor(random_state=0)
    tree.fit(X, y)
    return tree

def random_forest(X, y, n_estimators):
    forest = RandomForestRegressor(n_estimators=n_estimators)
    forest.fit(X, y)
    return forest

# Testing Metrics R2

In [9]:
from sklearn.metrics import r2_score

def testing(models):
    r_score = dict()
    for name, model in models.items():
#         print(model['Model'])
        y_pred = model['Model'].predict(model['X'])
        r_score[name] = r2_score(model['y'], y_pred)
    
    return r_score

# Single Prediction

In [10]:
def predict_no(model, X, scaler_X=None, scaler_y=None, pf=None):
    if scaler_X and scaler_y:
        y_pred = model.predict(scaler_X.transform([X]))
        y_pred = scaler_y.inverse_transform(y_pred.reshape(len(y_pred), 1))
        return y_pred.reshape(1, len(y_pred))
    elif pf:
        return model.predict(pf.transform([X]))
    else:
        return model.predict([X])

# Implementation

In [11]:
X_train, X_test, y_train, y_test = dataset_split(X, y)

In [12]:
# linear regressor
# no featrue scaling required
lr = linear_regression(X_train, y_train)

# polynomial regressor
# no featrue scaling required
pr, X_train_poly, pf = polynomial_linear_regression(X_train, y_train, 4)

# SVR
# feature scaling
scaler_X_svr, X_train_svr, X_test_svr = independent_var_scaler(X_train, X_test)
scaler_y_svr, y_train_svr, y_test_svr = dependent_var_scaler(y_train, y_test)
svr = sv_regressor(X_train_svr, y_train_svr, 'rbf')

# Decision Tree
# No feature scaling required
tree = decision_tree(X_train, y_train)

# Forest
# No feature scaling required
forest = random_forest(X_train, y_train, n_estimators=100)

# Comparison

In [13]:
r_score = testing({'Linear Regression': {'Model': lr, 'X': X_test, 'y': y_test},
                  'Polynomial Regression': {'Model': pr, 'X': pf.transform(X_test), 'y': y_test},
                   'SVR': {'Model': svr, 'X': X_test_svr, 'y': y_test_svr},
                   'Tree': {'Model': tree, 'X': X_test, 'y': y_test},
                   'Forest': {'Model': forest, 'X': X_test, 'y': y_test}
                  })

In [14]:
r_score

{'Linear Regression': 0.9325315554761303,
 'Polynomial Regression': 0.9458193347147237,
 'SVR': 0.9480784049986258,
 'Tree': 0.922905874177941,
 'Forest': 0.9648774827438888}

In [15]:
predict_no(lr, [24.12, 58.66, 1011.55, 58.96])

array([446.67292286])