In [8]:
# Base library
import numpy as np
import pandas as pd

# Deployment
import pickle
import warnings
warnings.filterwarnings('ignore')


After modeling, we try to predict the unseen data with Prediction label and Probability provided. Probability less than 0.5 will be classified as 0 and above 0.5 will be classified as 1.

In [2]:
# Load the saved model
with open('gaussian-nb-model.pkl', 'rb') as file:
    loaded_model = pickle.load(file)

In [3]:
# Sample data for prediction

test_data = pd.DataFrame({
    'age':[30,30],
    'job':['student','blue-collar'],
    'marital':['single','married'],
    'education':['professional.course','illiterate'],
    'default':['no','no'],
    'housing':['yes','no'],
    'loan':['no','no'],
    'emp.var.rate':[-1.1,1.4],
    'cons.price.idx':[94.199,93.994],
    'cons.conf.idx':[-37.5,-36.1],
    'euribor3m':[0.879,4.856],
    'nr.employed':[4963.6,4963],
})
test_data

Unnamed: 0,age,job,marital,education,default,housing,loan,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed
0,30,student,single,professional.course,no,yes,no,-1.1,94.199,-37.5,0.879,4963.6
1,30,blue-collar,married,illiterate,no,no,no,1.4,93.994,-36.1,4.856,4963.0


In [4]:
predictions = loaded_model.predict(test_data)

predictions

array([1, 1], dtype=int64)

In [5]:
# Bulk prediction

data_unseen = pd.read_csv('data unseen model 2.csv')
data_unseen.head()

Unnamed: 0,age,job,marital,education,default,housing,loan,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,Deposit
0,55,management,single,basic.4y,no,no,no,1.4,94.465,-41.8,4.962,5228.1,0
1,32,admin.,married,professional.course,no,no,no,-1.8,92.893,-46.2,1.299,5099.1,0
2,25,self-employed,married,university.degree,no,yes,no,-2.9,92.963,-40.8,1.268,5076.2,0
3,57,management,married,professional.course,unknown,yes,no,-0.1,93.2,-42.0,4.076,5195.8,0
4,55,unknown,married,university.degree,no,yes,no,-2.9,92.963,-40.8,1.26,5076.2,0


In [6]:
# Predict using the loaded model
y_pred_label = loaded_model.predict(data_unseen)
y_pred_proba = loaded_model.predict_proba(data_unseen)[:, 1]

# Add predictions to the DataFrame
data_unseen['Prediction'] = y_pred_label
data_unseen['Probability'] = y_pred_proba
data_unseen

Unnamed: 0,age,job,marital,education,default,housing,loan,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,Deposit,Prediction,Probability
0,55,management,single,basic.4y,no,no,no,1.4,94.465,-41.8,4.962,5228.1,0,1,8.136222e-01
1,32,admin.,married,professional.course,no,no,no,-1.8,92.893,-46.2,1.299,5099.1,0,1,9.529776e-01
2,25,self-employed,married,university.degree,no,yes,no,-2.9,92.963,-40.8,1.268,5076.2,0,1,9.857348e-01
3,57,management,married,professional.course,unknown,yes,no,-0.1,93.200,-42.0,4.076,5195.8,0,0,3.543546e-02
4,55,unknown,married,university.degree,no,yes,no,-2.9,92.963,-40.8,1.260,5076.2,0,1,9.828103e-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6865,58,blue-collar,married,unknown,unknown,yes,no,1.4,93.918,-42.7,4.962,5228.1,0,0,1.644881e-02
6866,54,entrepreneur,married,university.degree,unknown,no,no,1.4,94.465,-41.8,4.960,5228.1,0,0,3.261297e-09
6867,50,admin.,married,university.degree,no,no,no,1.4,93.444,-36.1,4.967,5228.1,0,1,9.155043e-01
6868,51,blue-collar,married,basic.9y,unknown,yes,no,1.4,93.918,-42.7,4.963,5228.1,0,0,4.336949e-02


In [7]:
# Save Prediction
data_unseen.to_csv('data_unseen_prediction_model2.csv', index=False)