## <div style='color:blue'> The below code goes in Flask_ML_API.ipynb file </div>

#### Import necessary libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn import metrics

#### Get the dataset

In [2]:
from sklearn.datasets import load_boston
bos = load_boston()

#### Initialize the dataset

In [3]:
bos_init = pd.DataFrame(bos.data)

In [4]:
#Preview of the top five rows
bos_init.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33


#### Import feature names from the dataset

In [5]:
bos_init.colums = bos.feature_names
bos_init.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33


#### Further need to predict price, create a target variable for the ML model

In [6]:
bos_init['Price'] = bos.target

#### The ML model will need a numerical value rather than categorical value for the predection of price; check if there are any NULL values

In [7]:
bos_init.isnull().sum()

0        0
1        0
2        0
3        0
4        0
5        0
6        0
7        0
8        0
9        0
10       0
11       0
12       0
Price    0
dtype: int64

#### Check if the values are categorical

In [8]:
bos_init.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 506 entries, 0 to 505
Data columns (total 14 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   0       506 non-null    float64
 1   1       506 non-null    float64
 2   2       506 non-null    float64
 3   3       506 non-null    float64
 4   4       506 non-null    float64
 5   5       506 non-null    float64
 6   6       506 non-null    float64
 7   7       506 non-null    float64
 8   8       506 non-null    float64
 9   9       506 non-null    float64
 10  10      506 non-null    float64
 11  11      506 non-null    float64
 12  12      506 non-null    float64
 13  Price   506 non-null    float64
dtypes: float64(14)
memory usage: 55.5 KB


### <div style='font-family:Georgia'> Creating the ML model for the Price prediction <br>
<b> Step 1: Separate features and target variables <br>
Step 2: Split the dataset into training and testing dataset <br>
Step 3: Continue creating the ML model </div>

#### Separate features and target variables

In [9]:
x = bos_init.drop(['Price'], axis = 1)
y = bos_init['Price']

#### Split the dataset into training and testing dataset

In [10]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 1)
print('Shape of x_train \n', x_train.shape, '\n\n Shape of x_test \n', x_test.shape, '\n-------------\n')
print('Shape of y_train \n', y_train.shape, '\n\n Shape of y_test \n', y_test.shape)

Shape of x_train 
 (404, 13) 

 Shape of x_test 
 (102, 13) 
-------------

Shape of y_train 
 (404,) 

 Shape of y_test 
 (102,)


#### Creating the ML model

In [11]:
from sklearn.ensemble import RandomForestRegressor
classifier = RandomForestRegressor()
classifier.fit(x_train, y_train)

RandomForestRegressor()

#### Model evaluation; evaluate the model performance for training and test dataset

In [12]:
##Training dataset model evaluation
print('Training dataset model evaluation results \n')
prediction = classifier.predict(x_train)
print('R^2: ', metrics.r2_score(y_train, prediction))
print('Mean Abs Error: ', metrics.mean_absolute_error(y_train, prediction))
print('Mean Squared Error: ', metrics.mean_squared_error(y_train, prediction))
print('Root Mean Square Error: ', np.sqrt(metrics.mean_squared_error(y_train, prediction)), '\n')

##Test dataset model evaluation
print('\nTest dataset model evaluation results \n')
prediction_test = classifier.predict(x_test)
print('R^2: ', metrics.r2_score(y_test, prediction_test))
print('Mean Abs Error: ', metrics.mean_absolute_error(y_test, prediction_test))
print('Mean Squared Error: ', metrics.mean_squared_error(y_test, prediction_test))
print('Root Mean Square Error: ', np.sqrt(metrics.mean_squared_error(y_test, prediction_test)))

Training dataset model evaluation results 

R^2:  0.9814199968501339
Mean Abs Error:  0.8210693069306922
Mean Squared Error:  1.500918386138613
Root Mean Square Error:  1.2251197435918715 


Test dataset model evaluation results 

R^2:  0.9155554098202047
Mean Abs Error:  2.2745784313725492
Mean Squared Error:  8.345438401960783
Root Mean Square Error:  2.888847244483651


### To save and use the ML object model files.
#### <div> 1. Create a folder with the name 'model' within the root directory <br> 2. Create two empty files within 'model' folder with the following file name and extension 'model.pkl' and 'model_columns.pkl' </div>
#### Serialization and Deserialization mechanism will be useful to store the ML object model in byte system and the other way round

In [13]:
#Save the model to a file
import pickle
with open('model/model.pkl', 'wb') as file:
    pickle.dump(classifier, file)

#Save the Columns
model_columns = list(x.columns)
with open('model/model_columns.pkl', 'wb') as file:
    pickle.dump(model_columns, file)