In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/airpressure/Folds5x2_pp.csv


# Credits and Links

* UDEMY - Machine Learning A-Z <sup>TM</sup>*

<a id="index"></a>
# Table of Content

1. [Importing Libraries](#1)
2. [Importing Dataset](#2)
3. [Splitting Dataset into Train/Test](#3)
4. [Training the models](#4)
   - [Multiple Linear Regression](#4.1)
   - [Polynomial Linear Regression](#4.2)
   - [Support Vector Regression](#4.3)
   - [Decision Tree Regression](#4.4)
   - [Random Forest Regression](#4.5)
5. [Predicting and Testing](#5)
   - [Multiple Linear Regression](#5.1)
   - [Polynomial Linear Regression](#5.2)
   - [Support Vector Regression](#5.3)
   - [Decision Tree Regression](#5.4)
   - [Random Forest Regression](#5.5)
6. [Model Evaluation - R^2](#6)
   - [Multiple Linear Regression](#6.1)
   - [Polynomial Linear Regression](#6.2)
   - [Support Vector Regression](#6.3)
   - [Decision Tree Regression](#6.4)
   - [Random Forest Regression](#6.5)
   - [Final Result](#6.6)

<a id="1"></a>
# 1. Importing Libraries

[Go back to Index](#index)

In [2]:
#1. General
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
%matplotlib inline

#2. Data Preprocessing
##2.1 Fill Missing Data
from sklearn.impute import SimpleImputer

##2.2 Encoding
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder

##2.3 Splitting Train and Test
from sklearn.model_selection import train_test_split

##2.4 Feature Scaling
from sklearn.preprocessing import StandardScaler

#3. All Models
##3.1 Simple/Multiple Linear Regression
from sklearn.linear_model import LinearRegression

##3.2 Polynomial Linear Regression
from sklearn.preprocessing import PolynomialFeatures

##3.3 Support Vector Linear Regression
from sklearn.svm import SVR

##3.4 Decision Tree Regressor Class
from sklearn.tree import DecisionTreeRegressor

##3.5 Random Forest Regressor Class
from sklearn.ensemble import RandomForestRegressor

#4. Model Performance
from sklearn.metrics import r2_score



<a id="2"></a>
# 2. Importing Dataset

[Go back to Index](#index)

In [3]:
# Dataset:= Combined Cycle Powerplant.csv
df = pd.read_csv('/kaggle/input/airpressure/Folds5x2_pp.csv')
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

In [4]:
#Reshaping y for SVR Model (n,1)
y_svr = y.reshape(len(y),1)

<a id="3"></a>
# 3. Splitting Dataset into Train/Test

[Go back to Index](#index)

In [5]:
# For the other models
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [6]:
# For SVR Model
# The only difference is that the y has 2D shape instead of the usual 1D
X_train_svr, X_test_svr, y_train_svr, y_test_svr = train_test_split(X, y_svr, test_size = 0.2, random_state = 0)

<a id="4"></a>
# 4. Training the models

[Go back to Index](#index)

<a id="4.1"></a>
## 4.1 Multiple Linear Regression

[Go back to Index](#index)

In [7]:
multi_regressor = LinearRegression()
multi_regressor.fit(X_train, y_train)

<a id="4.2"></a>
## 4.2 Polynomial Regression

[Go back to Index](#index)

In [8]:
poly_reg = PolynomialFeatures(degree = 4)
X_poly = poly_reg.fit_transform(X_train)
poly_regressor = LinearRegression()
poly_regressor.fit(X_poly, y_train)

<a id="4.3"></a>
## 4.3 Support Vector Regression

[Go back to Index](#index)

In [9]:
# Feature Scaling for SVR Model
sc_X = StandardScaler()
sc_y = StandardScaler()
X_train_svr = sc_X.fit_transform(X_train_svr)
y_train_svr = sc_y.fit_transform(y_train_svr)

In [10]:
svr_regressor = SVR(kernel = 'rbf') #Radial Basis Function
svr_regressor.fit(X_train_svr, y_train_svr)

  y = column_or_1d(y, warn=True)


<a id="4.4"></a>
## 4.4 Decision Tree Regression

[Go back to Index](#index)

In [11]:
dtr_regressor = DecisionTreeRegressor(random_state = 0)
dtr_regressor.fit(X_train, y_train)

<a id="4.5"></a>
## 4.5 Random Forest Regression

[Go back to Index](#index)

In [12]:
rfr_regressor = RandomForestRegressor(n_estimators = 10, random_state = 0)
rfr_regressor.fit(X_train, y_train)

<a id="5"></a>
# 5. Predicting and Testing all models

[Go back to Index](#index)

<a id="5.1"></a>
## 5.1 Multiple Linear Regression

[Go back to Index](#index)

In [13]:
mr_y_pred = multi_regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((mr_y_pred.reshape(len(mr_y_pred),1), y_test.reshape(len(y_test),1)),1))

[[433.36 426.18]
 [448.39 451.1 ]
 [445.89 442.87]
 ...
 [456.06 454.2 ]
 [446.91 444.13]
 [432.57 436.58]]


<a id="5.2"></a>
## 5.2 Polynomial Regression

[Go back to Index](#index)

In [14]:
poly_y_pred = poly_regressor.predict(poly_reg.transform(X_test))
np.set_printoptions(precision=2)
print(np.concatenate((poly_y_pred.reshape(len(poly_y_pred),1), y_test.reshape(len(y_test),1)),1))

[[430.93 426.18]
 [448.01 451.1 ]
 [444.39 442.87]
 ...
 [455.1  454.2 ]
 [446.74 444.13]
 [433.52 436.58]]


<a id="5.3"></a>
## 5.3 Support Vector Regression

[Go back to Index](#index)

In [15]:
svr_y_pred = sc_y.inverse_transform(svr_regressor.predict(sc_X.transform(X_test_svr)).reshape(-1,1))
np.set_printoptions(precision=2)
print(np.concatenate((svr_y_pred.reshape(len(svr_y_pred),1), y_test_svr.reshape(len(y_test_svr),1)),1))

[[431.22 426.18]
 [448.88 451.1 ]
 [444.04 442.87]
 ...
 [455.18 454.2 ]
 [447.12 444.13]
 [432.72 436.58]]


<a id="5.4"></a>
## 5.4 Decision Tree Regression

[Go back to Index](#index)

In [16]:
dtr_y_pred = dtr_regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((dtr_y_pred.reshape(len(dtr_y_pred),1), y_test.reshape(len(y_test),1)),1))

[[433.83 426.18]
 [455.23 451.1 ]
 [441.43 442.87]
 ...
 [454.25 454.2 ]
 [444.23 444.13]
 [437.14 436.58]]


<a id="5.5"></a>
## 5.5 Random Forest Regression

[Go back to Index](#index)

In [17]:
rfr_y_pred = rfr_regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((rfr_y_pred.reshape(len(rfr_y_pred),1), y_test.reshape(len(y_test),1)),1))

[[431.77 426.18]
 [448.62 451.1 ]
 [441.1  442.87]
 ...
 [454.33 454.2 ]
 [446.42 444.13]
 [435.72 436.58]]


<a id="6"></a>
# 6. Evaluating Model Performance

[Go back to Index](#index)

<a id="6.1"></a>
## 6.1 Multiple Linear Regression

[Go back to Index](#index)

In [18]:
df_mr = pd.DataFrame({"Model":["Multiple Regression"],"R2":[r2_score(y_test, mr_y_pred)]})
df_mr

Unnamed: 0,Model,R2
0,Multiple Regression,0.929899


y_pred = regressor.predict(poly_reg.transform(X_test))
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

<a id="6.2"></a>
## 6.2 Polynomial Regression

[Go back to Index](#index)

In [19]:
df_pr = pd.DataFrame({"Model":["Polynomial Regression"],"R2":[r2_score(y_test, poly_y_pred)]})
df_pr

Unnamed: 0,Model,R2
0,Polynomial Regression,0.942883


<a id="6.3"></a>
## 6.3 Support Vector Regression

[Go back to Index](#index)

In [20]:
df_svr = pd.DataFrame({"Model":["Support Vector Regression"],"R2":[r2_score(y_test_svr, svr_y_pred)]})
df_svr

Unnamed: 0,Model,R2
0,Support Vector Regression,0.945384


<a id="6.4"></a>
## 6.4 Decision Tree Regression

[Go back to Index](#index)

In [21]:
df_dtr = pd.DataFrame({"Model":["Decision Tree Regression"],"R2":[r2_score(y_test, dtr_y_pred)]})
df_dtr

Unnamed: 0,Model,R2
0,Decision Tree Regression,0.935815


<a id="6.5"></a>
## 6.5 Random Forest Regression

[Go back to Index](#index)

In [22]:
df_rfr = pd.DataFrame({"Model":["Random Forest Regression"],"R2":[r2_score(y_test, rfr_y_pred)]})
df_rfr

Unnamed: 0,Model,R2
0,Random Forest Regression,0.960025


<a id="6.6"></a>
## 6.6 Final Result

[Go back to Index](#index)

In [23]:
#Final DataFrame that displays all the Models and their R2 Score
df_r2 = pd.concat([df_mr, df_pr, df_svr, df_dtr, df_rfr]).set_index("Model")
df_r2

Unnamed: 0_level_0,R2
Model,Unnamed: 1_level_1
Multiple Regression,0.929899
Polynomial Regression,0.942883
Support Vector Regression,0.945384
Decision Tree Regression,0.935815
Random Forest Regression,0.960025
