In [26]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score,confusion_matrix
from sklearn.metrics import classification_report
pd.set_option('display.max_rows',None)
pd.set_option('display.max_columns',None)

### Reading Dataset

In [27]:
df = pd.read_csv('Social_Network_Ads.csv')

### Checking head elements

In [28]:
df.head(10)

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0
5,15728773,Male,27,58000,0
6,15598044,Female,27,84000,0
7,15694829,Female,32,150000,1
8,15600575,Male,25,33000,0
9,15727311,Female,35,65000,0


### Checking tail elements

In [29]:
df.tail()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
395,15691863,Female,46,41000,1
396,15706071,Male,51,23000,1
397,15654296,Female,50,20000,1
398,15755018,Male,36,33000,0
399,15594041,Female,49,36000,1


### Checking shape of dataset

In [30]:
df.shape

(400, 5)

### Checking info about columns and their datatypes

In [31]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   User ID          400 non-null    int64 
 1   Gender           400 non-null    object
 2   Age              400 non-null    int64 
 3   EstimatedSalary  400 non-null    int64 
 4   Purchased        400 non-null    int64 
dtypes: int64(4), object(1)
memory usage: 15.8+ KB


### Dropping Unnecessary Columns

In [32]:
df.drop(['User ID'],axis = 1,inplace = True)

### Converting Categorical Column into Numerical Column

In [33]:
df['Gender'].replace(['Female','Male'],[0,1],inplace = True)

In [34]:
df.head(10)

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,1,19,19000,0
1,1,35,20000,0
2,0,26,43000,0
3,0,27,57000,0
4,1,19,76000,0
5,1,27,58000,0
6,0,27,84000,0
7,0,32,150000,1
8,1,25,33000,0
9,0,35,65000,0


### Train Test Split

In [35]:
train_df,test_df = train_test_split(df,test_size = 0.30)

In [36]:
trainX = train_df.iloc[:,:-1]
trainY  = train_df.iloc[:,-1]
testX = test_df.iloc[:,:-1]
testY = test_df.iloc[:,-1]

### Standard Scaling To Scale The Data

In [37]:
sc = StandardScaler()
trainX = sc.fit_transform(trainX)
testX = sc.transform(testX)

### Model Building

In [38]:
model = LogisticRegression()

In [39]:
model.fit(trainX,trainY)

LogisticRegression()

In [40]:
pred = model.predict(testX)

In [41]:
pred

array([0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0,
       1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0,
       1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1,
       0, 0, 1, 1, 0, 1, 0, 1, 0, 1], dtype=int64)

### Evaluating Model

#### Accuracy Score

In [42]:
accuracy_score(pred,testY)

0.8

#### Confusion Matrix

In [43]:
cm = confusion_matrix(testY,pred)

In [44]:
cm

array([[66, 15],
       [ 9, 30]], dtype=int64)

#### True-Positive (TP), False Positive (FP), False Negative (FN), True Negative(TN)

In [45]:
TP = cm[0][0]
FP = cm[0][1]
FN = cm[1][0]
TN = cm[1][1]

In [46]:
TP

66

In [47]:
FP

15

In [48]:
FN

9

In [49]:
TN

30

#### Precision, Recall and F1_score

In [50]:
print(classification_report(testY,pred))

              precision    recall  f1-score   support

           0       0.88      0.81      0.85        81
           1       0.67      0.77      0.71        39

    accuracy                           0.80       120
   macro avg       0.77      0.79      0.78       120
weighted avg       0.81      0.80      0.80       120

