# $\textbf{Model Training and Prediction}$

## **Importing the relevant libraries**

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf

## **Importing the engineered datasets**

In [2]:
X_train = pd.read_csv('X_train.csv').iloc[:, 1:].values
X_test = pd.read_csv('X_test.csv').iloc[:, 1:].values
y_train = pd.read_csv('y_train.csv').iloc[:, :].values
y_test = pd.read_csv('y_test.csv').iloc[:, :].values

### **Checking their shape**
Let's check if their shapes are as expected.

In [3]:
X_train.shape

(978, 90)

In [4]:
X_test.shape

(482, 90)

In [5]:
y_train.shape

(978, 1)

In [6]:
y_test.shape

(482, 1)

## **Training models**

### **Random Forest Regression**

In [7]:
from sklearn.ensemble import RandomForestRegressor
regressor_rf = RandomForestRegressor(n_estimators = 10, random_state = 0)
regressor_rf.fit(X_train, y_train)

  This is separate from the ipykernel package so we can avoid doing imports until


RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=None, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=10, n_jobs=None, oob_score=False,
                      random_state=0, verbose=0, warm_start=False)

#### **Predicting the test results**

In [8]:
y_pred_rf = regressor_rf.predict(X_test)

#### **Evaluating the Model Performance**

In [9]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred_rf)

0.8524775450234576

### **Multiple Linear Regression**

In [10]:
from sklearn.linear_model import LinearRegression
regressor_mle = LinearRegression()
regressor_mle.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

#### **Predicting the test results**

In [11]:
y_pred_mle = regressor_mle.predict(X_test)

#### **Evaluating the Model Performance**

In [12]:
r2_score(y_test, y_pred_mle)

0.7625674258043331

### **Decision Tree Regression**

In [13]:
from sklearn.tree import DecisionTreeRegressor
regressor_dt = DecisionTreeRegressor(random_state = 0)
regressor_dt.fit(X_train, y_train)

DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse', max_depth=None,
                      max_features=None, max_leaf_nodes=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, presort='deprecated',
                      random_state=0, splitter='best')

#### **Predicting the test results**

In [14]:
y_pred_dt = regressor_dt.predict(X_test)

#### **Evaluating the Model Performance**

In [15]:
r2_score(y_test, y_pred_dt)

0.7288962903084069

### **XGBoost**

In [16]:
from xgboost import XGBRegressor
regressor_xgb = XGBRegressor()
regressor_xgb.fit(X_train, y_train)



XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=0,
             importance_type='gain', learning_rate=0.1, max_delta_step=0,
             max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
             n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
             silent=None, subsample=1, verbosity=1)

#### **Predicting the test results**

In [17]:
y_pred_xgb = regressor_xgb.predict(X_test)

#### **Evaluating the Model Performance**

In [18]:
r2_score(y_test, y_pred_xgb)

0.8694805333603076

### **Artificial Neural Networks**

In [29]:
ann = tf.keras.models.Sequential()
ann.add(tf.keras.layers.Dense(units=128, activation='relu'))
ann.add(tf.keras.layers.Dropout(0.2))
ann.add(tf.keras.layers.Dense(units=128, activation='relu'))
ann.add(tf.keras.layers.Dropout(0.2))

ann.add(tf.keras.layers.Dense(units=256, activation='relu'))
ann.add(tf.keras.layers.Dropout(0.2))
ann.add(tf.keras.layers.Dense(units=256, activation='relu'))
ann.add(tf.keras.layers.Dropout(0.2))

ann.add(tf.keras.layers.Dense(units=512, activation='relu'))
ann.add(tf.keras.layers.Dropout(0.2))
ann.add(tf.keras.layers.Dense(units=512, activation='relu'))
ann.add(tf.keras.layers.Dropout(0.2))

ann.add(tf.keras.layers.Dense(units=1024, activation='relu'))
ann.add(tf.keras.layers.Dropout(0.2))
ann.add(tf.keras.layers.Dense(units=1024, activation='relu'))
ann.add(tf.keras.layers.Dropout(0.2))

ann.add(tf.keras.layers.Dense(units=2048, activation='relu'))
ann.add(tf.keras.layers.Dropout(0.2))
ann.add(tf.keras.layers.Dense(units=2048, activation='relu'))
ann.add(tf.keras.layers.Dropout(0.2))

ann.add(tf.keras.layers.Dense(units=1))

In [30]:
ann.compile(optimizer = 'adam', loss = 'mean_squared_error')

In [31]:
r = ann.fit(X_train, y_train, batch_size = 32, epochs = 200)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

#### **Predicting the test results**

In [32]:
y_pred_ann = ann.predict(X_test)

#### **Evaluating the Model Performance**

In [33]:
r2_score(y_test, y_pred_ann)

0.8686414219460973

## **Final Comments:**
1. As we can see, I got the best $r^{2}$ with XGBoost. Though this is a decent score, I feel that the score would have been even better if I had carried out feature selection. I will try it and soon update this notebook.

Thank you for reading!