# Importing the Necessary Libraries

In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
%matplotlib inline

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/social-network-ads/Social_Network_Ads.csv


# Data Importation and Exploration

In [2]:
# Loading and previewing our dataset
social_df = pd.read_csv('/kaggle/input/social-network-ads/Social_Network_Ads.csv')
social_df.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0


In [3]:
# Determining the size of our dataset
social_df.shape

(400, 5)

# Data Preparation

In [4]:
social_df["Gender"] = np.where(social_df["Gender"].str.contains("Male", "Female"), 1, 0)
social_df.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,1,19,19000,0
1,15810944,1,35,20000,0
2,15668575,0,26,43000,0
3,15603246,0,27,57000,0
4,15804002,1,19,76000,0


# Data Modeling

In [5]:
# Preparing our dataset for training
X = social_df.iloc[:, [1, 2 ,3]].values
y = social_df.iloc[:, 4].values

In [6]:
# Splitting the dataset into a training set and test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 42)

In [7]:
# Normalisation
from sklearn.preprocessing import MinMaxScaler
norm = MinMaxScaler().fit(X_train)
X_train = norm.transform(X_train)
X_test = norm.transform(X_test)

In [8]:
# Comparing how the different classification algorithms will perform
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier

logistic_classifier = LogisticRegression()
decision_classifier = DecisionTreeClassifier()
svm_classifier = SVC()
knn_classifier = KNeighborsClassifier()
naive_classifier = GaussianNB()

# Using these classifiers to fit our data, X_train and y_train
logistic_classifier.fit(X_train, y_train)
decision_classifier.fit(X_train, y_train)
svm_classifier.fit(X_train, y_train)
knn_classifier.fit(X_train, y_train)
naive_classifier.fit(X_train, y_train)

In [9]:
# Predicting the test set results
logistic_y_prediction = logistic_classifier.predict(X_test)
decision_y_prediction = decision_classifier.predict(X_test)
svm_y_prediction = svm_classifier.predict(X_test)
knn_y_prediction = knn_classifier.predict(X_test)
naive_y_prediction = naive_classifier.predict(X_test)

In [10]:
# Printing the evaluation metrics to determine the accuracy of classifiers
from sklearn.metrics import classification_report, accuracy_score

print(accuracy_score(logistic_y_prediction, y_test))
print(accuracy_score(decision_y_prediction, y_test))
print(accuracy_score(svm_y_prediction, y_test))
print(accuracy_score(knn_y_prediction, y_test))
print(accuracy_score(naive_y_prediction, y_test))

0.87
0.84
0.94
0.92
0.93


In [11]:
# Printing the classification report
print('Logistic classifier:')
print(classification_report(y_test, logistic_y_prediction))

print('Decision Tree classifier:')
print(classification_report(y_test, decision_y_prediction))

print('SVM Classifier:')
print(classification_report(y_test, svm_y_prediction))

print('KNN Classifier:')
print(classification_report(y_test, knn_y_prediction))

print('Naive Bayes Classifier:')
print(classification_report(y_test, naive_y_prediction))

Logistic classifier:
              precision    recall  f1-score   support

           0       0.84      0.98      0.91        63
           1       0.96      0.68      0.79        37

    accuracy                           0.87       100
   macro avg       0.90      0.83      0.85       100
weighted avg       0.88      0.87      0.86       100

Decision Tree classifier:
              precision    recall  f1-score   support

           0       0.86      0.89      0.88        63
           1       0.80      0.76      0.78        37

    accuracy                           0.84       100
   macro avg       0.83      0.82      0.83       100
weighted avg       0.84      0.84      0.84       100

SVM Classifier:
              precision    recall  f1-score   support

           0       0.97      0.94      0.95        63
           1       0.90      0.95      0.92        37

    accuracy                           0.94       100
   macro avg       0.93      0.94      0.94       100
weighted av

In [12]:
# Using a confusion matrix to determine the accuracy of our model
from sklearn.metrics import confusion_matrix

print('Logistic Regression classifier:')
print(confusion_matrix(logistic_y_prediction, y_test))

print('Decision Tree classifier:')
print(confusion_matrix(decision_y_prediction, y_test))

print('KNN Classifier:')
print(confusion_matrix(knn_y_prediction, y_test))

print('SVM classifier:')
print(confusion_matrix(svm_y_prediction, y_test))

print('Naive Bayes classifier:')
print(confusion_matrix(naive_y_prediction, y_test))

Logistic Regression classifier:
[[62 12]
 [ 1 25]]
Decision Tree classifier:
[[56  9]
 [ 7 28]]
KNN Classifier:
[[58  3]
 [ 5 34]]
SVM classifier:
[[59  2]
 [ 4 35]]
Naive Bayes classifier:
[[61  5]
 [ 2 32]]


In [13]:
# Making a new prediction & comparing results
new_case = [[0, 60, 2500]] # Gender, Age, Salary

# We will need to transform our new case
new_case = norm.transform(new_case)

print('Logistic Regression classifier', logistic_classifier.predict(new_case))
print('Decision Tree classifier:', decision_classifier.predict(new_case))
print('SVM classifier:', svm_classifier.predict(new_case))
print('KNN classifier:', knn_classifier.predict(new_case))
print('Naive Bayes classifier:', naive_classifier.predict(new_case))

Logistic Regression classifier [1]
Decision Tree classifier: [1]
SVM classifier: [1]
KNN classifier: [1]
Naive Bayes classifier: [1]
