****Not Much Preprocessing Tools included here**

## <span style="color:orange">Multiple Linear Regression</span>

#### <span style="color:orange">Importing the Libraries</span>

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

#### <span style="color:orange">Importing the Dataset</span>

In [2]:
dataset_MLR = pd.read_csv('Dataset\\Data.csv')

In [3]:
input_features_MLR = dataset_MLR.iloc[ : , :-1].values              ## X
dv_MLR = dataset_MLR.iloc[ : , -1].values                           ## y (dependent variable)

#### <span style="color:orange">Splitting the data</span>

In [4]:
from sklearn.model_selection import train_test_split

# // X_train, X_test, y_train, y_test, (tuple unpacking)
(input_features_train_MLR, input_features_test_MLR,
 dv_train_MLR, dv_test_MLR) = train_test_split(input_features_MLR, dv_MLR, test_size=0.2, random_state=0)

#### <span style="color:orange">Training the Multiple Linear Regression Model on the Training set</span>

In [5]:
from sklearn.linear_model import LinearRegression

In [6]:
## building the model
MLR = LinearRegression()

In [7]:
## training the model
MLR.fit(input_features_train_MLR, dv_train_MLR)

#### <span style="color:orange">Predicting the test set results</span>

In [8]:
## y_pred
test_set_predicted_dv_MLR = MLR.predict(input_features_test_MLR)

In [9]:
np.set_printoptions(precision=2)  # // to make values only two decimal points

print(np.concatenate((test_set_predicted_dv_MLR.reshape(len(test_set_predicted_dv_MLR), 1),
                    dv_test_MLR.reshape(len(dv_test_MLR), 1) ),
                     axis=1) )  # // show the predicted value and next to it the real value

[[431.43 431.23]
 [458.56 460.01]
 [462.75 461.14]
 ...
 [469.52 473.26]
 [442.42 438.  ]
 [461.88 463.28]]


#### <span style="color:orange">Making a single prediction</span>

**Depends on The Dataset Features**

**Check the Main MLR Code for Clarification**

#### <span style="color:orange">Getting the Final Linear Regression Equation with the Values of the Coefficients</span>

In [10]:
## b0: y-intercept
print("The final b0: ", MLR.intercept_)  # // -> The final b0: 42467.52924853204

The final b0:  452.84103716163816


In [11]:
## bn: slopes/coefficients (b1, b2, etc.)
print("The final bn: ", MLR.coef_)

The final bn:  [-1.97 -0.24  0.06 -0.16]


#### <span style="color:orange">Evaluating the MLR Model Performance</span>

In [12]:
from sklearn.metrics import r2_score

print(r2_score(dv_test_MLR, test_set_predicted_dv_MLR) * 100, '%')

93.25315554761303 %


___

## <span style="color:orange">Polynomial Regression</span>

#### <span style="color:orange">Importing the libraries</span>

In [13]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

#### <span style="color:orange">Importing the Dataset</span>

In [14]:
dataset_PLR = pd.read_csv('Dataset\\Data.csv')

In [15]:
input_features_PLR = dataset_PLR.iloc[ : , :-1].values              ## X
dv_PLR = dataset_PLR.iloc[ : , -1].values                           ## y (dependent variable)

#### <span style="color:orange">Splitting the Data</span>

In [16]:
from sklearn.model_selection import train_test_split

# // X_train, X_test, y_train, y_test, (tuple unpacking)
(input_features_train_PLR, input_features_test_PLR,
 dv_train_PLR, dv_test_PLR) = train_test_split(input_features_PLR, dv_PLR, test_size=0.2, random_state=0)

#### <span style="color:orange">Training The Polynomial Regression Model on the Training Set</span>

In [17]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression

In [18]:
## Building the model
polynomial_R_features = PolynomialFeatures(degree=4) # // choosing the degree
## Transforming the data by the Model into a Polynomial features
poly_input_features = polynomial_R_features.fit_transform(input_features_train_PLR)

## Building and training Linear Regression model on the new input features resulted from PolyFeatures
poly_linear_R = LinearRegression()
poly_linear_R.fit(poly_input_features, dv_train_PLR)

#### <span style="color:orange">Predicting the test set results</span>

In [19]:
## y_pred
test_set_predicted_dv_PLR = poly_linear_R.predict(polynomial_R_features.transform(input_features_test_PLR) )

In [20]:
np.set_printoptions(precision=2)  # // to make values only two decimal points

print(np.concatenate((test_set_predicted_dv_PLR.reshape(len(test_set_predicted_dv_PLR), 1),
                    dv_test_PLR.reshape(len(dv_test_PLR), 1) ),
                     axis=1) )

[[433.94 431.23]
 [457.9  460.01]
 [460.52 461.14]
 ...
 [469.53 473.26]
 [438.27 438.  ]
 [461.67 463.28]]


#### <span style="color:orange">Evaluating PLR Performance</span>

In [21]:
from sklearn.metrics import r2_score

print(r2_score(dv_test_PLR, test_set_predicted_dv_PLR) * 100, '%')

94.58192729884297 %


___

## <span style="color:orange">Support Vector Regression</span>

#### <span style="color:orange">Importing the Libraries</span>

In [22]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

#### <span style="color:orange">Importing the Dataset</span>

In [23]:
dataset_SVR = pd.read_csv('Dataset\\Data.csv')

In [24]:
dataset_SVR

Unnamed: 0,AT,V,AP,RH,PE
0,14.96,41.76,1024.07,73.17,463.26
1,25.18,62.96,1020.04,59.08,444.37
2,5.11,39.40,1012.16,92.14,488.56
3,20.86,57.32,1010.24,76.64,446.48
4,10.82,37.50,1009.23,96.62,473.90
...,...,...,...,...,...
9563,16.65,49.69,1014.01,91.00,460.03
9564,13.19,39.18,1023.67,66.78,469.62
9565,31.32,74.33,1012.92,36.48,429.57
9566,24.48,69.45,1013.86,62.39,435.74


In [25]:
input_features_SVR = dataset_SVR.iloc[ : , :-1].values              ## X
dv_SVR = dataset_SVR.iloc[ : , -1].values                           ## y (dependent variable)

In [26]:
input_features_SVR

array([[  14.96,   41.76, 1024.07,   73.17],
       [  25.18,   62.96, 1020.04,   59.08],
       [   5.11,   39.4 , 1012.16,   92.14],
       ...,
       [  31.32,   74.33, 1012.92,   36.48],
       [  24.48,   69.45, 1013.86,   62.39],
       [  21.6 ,   62.52, 1017.23,   67.87]])

In [27]:
dv_SVR

array([463.26, 444.37, 488.56, ..., 429.57, 435.74, 453.28])

In [28]:
dv_SVR = dv_SVR.reshape(len(dv_SVR), 1)
# // Preprocessing technique to make it into 1 col instead of 1 row, if it was 1 row
# // SVR Model wants the dv as 1 row not 1 col, so you only want to convert it, if you want to apply
# /  Feature scaling for example or whatever, just convert it back after.
# // Note: the SVR model works the same whether you convert it to 1 row or 1 col

In [29]:
dv_SVR

array([[463.26],
       [444.37],
       [488.56],
       ...,
       [429.57],
       [435.74],
       [453.28]])

#### <span style="color:orange">Splitting the Data</span>

In [30]:
from sklearn.model_selection import train_test_split

# // X_train, X_test, y_train, y_test, (tuple unpacking)
(input_features_train_SVR, input_features_test_SVR,
 dv_train_SVR, dv_test_SVR) = train_test_split(input_features_SVR, dv_SVR, test_size=0.2, random_state=0)

#### <span style="color:orange">Feature Scaling for SVR</span>

In [31]:
from sklearn.preprocessing import StandardScaler

## Standardizing the input feature
standard_scaler_X = StandardScaler()
input_features_train_SVR = standard_scaler_X.fit_transform(input_features_train_SVR)

## Standardizing the Dependent Variable
standard_scaler_y = StandardScaler()
dv_train_SVR = standard_scaler_y.fit_transform(dv_train_SVR)

#### <span style="color:orange">Training the SVR Model on the Training Set</span>

In [32]:
from sklearn.svm import SVR

support_vector_regressor = SVR(kernel='rbf')
support_vector_regressor.fit(input_features_train_SVR, np.ravel(dv_train_SVR) )

#### <span style="color:orange">Predicting Test Set Results</span>

In [33]:
## y_pred
test_set_predicted_dv_SVR = standard_scaler_y.inverse_transform(support_vector_regressor.predict                                           (standard_scaler_X.transform(input_features_test_SVR) ).reshape(-1, 1) )

In [34]:
np.set_printoptions(precision=2)  # // to make values only two decimal points

print(np.concatenate((test_set_predicted_dv_SVR.reshape(len(test_set_predicted_dv_SVR), 1),
                    dv_test_SVR.reshape(len(dv_test_SVR), 1) ),
                     axis=1) )

[[434.05 431.23]
 [457.94 460.01]
 [461.03 461.14]
 ...
 [470.6  473.26]
 [439.42 438.  ]
 [460.92 463.28]]


#### <span style="color:orange">Evaluating the SVR Model Performance</span>

In [35]:
from sklearn.metrics import r2_score

print(r2_score(dv_test_SVR, test_set_predicted_dv_SVR) * 100, '%')

94.80784049986258 %


___

## <span style="color:orange">Decision Tree</span>

#### <span style="color:orange">Importing the Libraries</span>

In [36]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

#### <span style="color:orange">Importing the Dataset</span>

In [37]:
dataset_DT = pd.read_csv('Dataset\\Data.csv')

In [38]:
input_features_DT = dataset_DT.iloc[ : , :-1].values              ## X
dv_DT = dataset_DT.iloc[ : , -1].values                           ## y (dependent variable)

#### <span style="color:orange">Splitting the Data</span>

In [39]:
from sklearn.model_selection import train_test_split

# // X_train, X_test, y_train, y_test, (tuple unpacking)
(input_features_train_DT, input_features_test_DT,
 dv_train_DT, dv_test_DT) = train_test_split(input_features_DT, dv_DT, test_size=0.2, random_state=0)

#### <span style="color:orange">Training the DT Model on the Training Set</span>

In [40]:
from sklearn.tree import DecisionTreeRegressor

## Building the Model
decision_tree_regressor = DecisionTreeRegressor(random_state=0)
## Training the Model
decision_tree_regressor.fit(X=input_features_train_DT, y=dv_train_DT)

#### <span style="color:orange">Predicting the Test Set Results</span>

In [41]:
## y_pred
test_set_predicted_dv_DT = decision_tree_regressor.predict(input_features_test_DT)

In [42]:
np.set_printoptions(precision=2)  # // to make values only two decimal points

print(np.concatenate((test_set_predicted_dv_DT.reshape(len(test_set_predicted_dv_DT), 1),
                    dv_test_DT.reshape(len(dv_test_DT), 1) ),
                     axis=1) )

[[431.28 431.23]
 [459.59 460.01]
 [460.06 461.14]
 ...
 [471.46 473.26]
 [437.76 438.  ]
 [462.74 463.28]]


#### <span style="color:orange">Evaluating the DT model</span>

In [43]:
from sklearn.metrics import r2_score

print(r2_score(dv_test_DT, test_set_predicted_dv_DT) * 100, '%')

92.2905874177941 %


___

## <span style="color:orange">Random Forest</span>

#### <span style="color:orange">Importing the Libraries</span>

In [44]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

#### <span style="color:orange">Importing the Dataset</span>

In [45]:
dataset_RF = pd.read_csv('Dataset\\Data.csv')

In [46]:
input_features_RF = dataset_RF.iloc[ : , :-1].values              ## X
dv_RF = dataset_RF.iloc[ : , -1].values                           ## y (dependent variable)

#### <span style="color:orange">Splitting the Data</span>

In [47]:
from sklearn.model_selection import train_test_split

# // X_train, X_test, y_train, y_test, (tuple unpacking)
(input_features_train_RF, input_features_test_RF,
 dv_train_RF, dv_test_RF) = train_test_split(input_features_RF, dv_RF, test_size=0.2, random_state=0)

#### <span style="color:orange">Training the RF Model on the Training Set</span>

In [48]:
from sklearn.ensemble import RandomForestRegressor

## Building the Model
random_forest_regressor = RandomForestRegressor(n_estimators=10, random_state=0) # // no. of DT
## Training the Model
random_forest_regressor.fit(X=input_features_train_RF, y=dv_train_RF)

#### <span style="color:orange">Predicting the Test Set Results</span>

In [49]:
## y_pred
test_set_predicted_dv_RF = random_forest_regressor.predict(input_features_test_RF)

In [50]:
np.set_printoptions(precision=2)  # // to make values only two decimal points

print(np.concatenate((test_set_predicted_dv_RF.reshape(len(test_set_predicted_dv_RF), 1),
                    dv_test_RF.reshape(len(dv_test_RF), 1) ),
                     axis=1) )

[[434.05 431.23]
 [458.79 460.01]
 [463.02 461.14]
 ...
 [469.48 473.26]
 [439.57 438.  ]
 [460.38 463.28]]


#### <span style="color:orange">Evaluating RF Model</span>

In [51]:
from sklearn.metrics import r2_score

print(r2_score(dv_test_RF, test_set_predicted_dv_RF) * 100, '%')

96.15908334363877 %


___

**Notes:**

The best model for this dataset was Random Forest with an evaluation score of nearly 96.2%