<a href="https://colab.research.google.com/github/Nidhi89717/ML/blob/main/15-Model-Deployment/01_Model_Persistence.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Model Persistence

In [21]:
import pandas as pd
import numpy as np

In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [3]:
df = pd.read_csv('gdrive/My Drive/csv_files/Advertising.csv')

In [4]:
df

Unnamed: 0,TV,radio,newspaper,sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9
...,...,...,...,...
195,38.2,3.7,13.8,7.6
196,94.2,4.9,8.1,9.7
197,177.0,9.3,6.4,12.8
198,283.6,42.0,66.2,25.5


In [5]:
df.describe()

Unnamed: 0,TV,radio,newspaper,sales
count,200.0,200.0,200.0,200.0
mean,147.0425,23.264,30.554,14.0225
std,85.854236,14.846809,21.778621,5.217457
min,0.7,0.0,0.3,1.6
25%,74.375,9.975,12.75,10.375
50%,149.75,22.9,25.75,12.9
75%,218.825,36.525,45.1,17.4
max,296.4,49.6,114.0,27.0


In [6]:
X = df.drop('sales',axis=1)
y = df['sales']

In [7]:
from sklearn.model_selection import train_test_split

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)

In [9]:
X_validation, X_holdout_test, y_validation, y_holdout_test = train_test_split(X_test, y_test, test_size=0.5, random_state=101)

In [10]:
len(X)

200

In [11]:
len(X_train)

140

In [12]:
len(X_validation)

30

In [13]:
len(X_holdout_test)

30

## Model Evaluation and Hyperparameter Tuning

In [14]:
from sklearn.ensemble import RandomForestRegressor

In [23]:
model = RandomForestRegressor(n_estimators=30,random_state=101)

In [24]:
model.fit(X_train,y_train)

RandomForestRegressor(n_estimators=30, random_state=101)

In [25]:
from sklearn.metrics import mean_absolute_error,mean_squared_error

In [26]:
validation_prediction = model.predict(X_validation)

In [27]:
mean_absolute_error(y_validation,validation_prediction)

0.6575555555555552

In [28]:
np.sqrt(mean_squared_error(y_validation,validation_prediction))

0.8542009478215644

## Final Hold Out Test Performance for Reporting

In [30]:
holdout_predictions=model.predict(X_holdout_test)

In [31]:
mean_absolute_error(y_holdout_test,holdout_predictions)

0.5937777777777775

In [33]:
mean_squared_error(y_holdout_test,holdout_predictions)**0.5

0.745323693040418

## Full Training

In [34]:
final_model = RandomForestRegressor(n_estimators=30,random_state=101)

In [35]:
final_model.fit(X,y)

RandomForestRegressor(n_estimators=30, random_state=101)

## Saving Model (and anything else as pickle file)

In [36]:
import joblib

In [38]:
joblib.dump(final_model,'gdrive/My Drive/csv_files/final_model.pkl')

['gdrive/My Drive/csv_files/final_model.pkl']

In [39]:
list(X.columns)

['TV', 'radio', 'newspaper']

In [44]:
joblib.dump(list(X.columns),'gdrive/My Drive/csv_files/col_names.pkl')

['gdrive/My Drive/csv_files/col_names.pkl']

## Loading Model (Model Persistence)

In [45]:
new_columns = joblib.load('gdrive/My Drive/csv_files/col_names.pkl')

In [46]:
new_columns

['TV', 'radio', 'newspaper']

In [56]:
loaded_model = joblib.load('gdrive/My Drive/csv_files/final_model.pkl')

In [57]:
loaded_model.predict([[230.1,37.8,69.2]])



array([21.99])