#Case Study on Python Flask:
Using the given dataset, create a web application to take feature inputs from the user in a web page and print whether a person will purchase the product or not.

In [None]:
# import all libraries necessary
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings("ignore")

In [None]:
#load the Social Network Ads Dataset
social=pd.read_csv('/content/Social_Network_Ads.csv')

In [None]:
# to view the first 5 rows
social.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0


In [None]:
# basic information, no null values in the data
social.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   User ID          400 non-null    int64 
 1   Gender           400 non-null    object
 2   Age              400 non-null    int64 
 3   EstimatedSalary  400 non-null    int64 
 4   Purchased        400 non-null    int64 
dtypes: int64(4), object(1)
memory usage: 15.8+ KB


In [None]:
# dropping User ID as it is unique and not relevant for the modelling
social.drop('User ID',axis=1,inplace=True)

In [None]:
# Using LabelEncoder for the Gender feature
le=LabelEncoder()
social['Gender']=le.fit_transform(social['Gender'])
social.head()

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,1,19,19000,0
1,1,35,20000,0
2,0,26,43000,0
3,0,27,57000,0
4,1,19,76000,0


In [None]:
# Assign Target variable-'Purchased' to y and the remaining Features to X
X=social.drop('Purchased',axis=1)
y=social['Purchased'].values

To check and display the values of Target variable

In [None]:
y

array([0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1,
       0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0,
       1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0,
       1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1,

In [None]:
# train and test the data
X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=42,test_size=0.2)

In [None]:
# using Random Forest Classifier with parameters to build the model
clf=RandomForestClassifier(n_estimators=100,random_state=42)
clf.fit(X_train,y_train)

In [None]:
y_pred=clf.predict(X_test)
y_pred

array([1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
       1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1,
       0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1,
       1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0])

In [None]:
print('Accuracy is:',accuracy_score(y_test,y_pred))

Accuracy is: 0.9


Accuracy of 0.9 is a good model and then using this model to predict the feature inputs

Feature Inputs :
Gender- Female denoted by 0 ( since we have encoded the Gender attribute),

Age - 25,

Estimated Salary - 33000


In [None]:
prediction=clf.predict([[0,25,33000]])

In [None]:
prediction

array([0])

In [None]:
prediction[0]

0

Prediction of 0 implies the user will not purchase

Next step: As the model is able to predict, use the pickle library to load the pickled file to VS Code

In [None]:
# import pickle to write the predicted model to load into VS Code as model_file
import pickle
with open('socialnew.pkl','wb') as model_file:
  pickle.dump(clf,model_file)

In [None]:
social_model=pickle.load(open('socialnew.pkl','rb'))  # open in a new variable, this code will be used in VS Code to read the pickle file

In [None]:
social['Gender'].dtype

dtype('int64')