In [12]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import StandardScaler

In [3]:
df = pd.read_csv('Pr_CSV_Files/Social_Network_Ads.csv')

In [4]:
df.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19.0,19000.0,0
1,15810944,Male,35.0,20000.0,0
2,15668575,Female,26.0,43000.0,0
3,15603246,Female,27.0,57000.0,0
4,15804002,Male,19.0,76000.0,0


In [5]:
df.tail()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
395,15691863,Female,46.0,41000.0,1
396,15706071,Male,51.0,23000.0,1
397,15654296,Female,50.0,20000.0,1
398,15755018,Male,36.0,33000.0,0
399,15594041,Female,49.0,36000.0,1


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   User ID          400 non-null    int64  
 1   Gender           400 non-null    object 
 2   Age              400 non-null    float64
 3   EstimatedSalary  400 non-null    float64
 4   Purchased        400 non-null    int64  
dtypes: float64(2), int64(2), object(1)
memory usage: 15.8+ KB


In [7]:
df.isnull().sum()

User ID            0
Gender             0
Age                0
EstimatedSalary    0
Purchased          0
dtype: int64

In [8]:
mapping = {'Male':0, 'Female':1}
df['Gender'] = df['Gender'].map(mapping)

In [9]:
df.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,0,19.0,19000.0,0
1,15810944,0,35.0,20000.0,0
2,15668575,1,26.0,43000.0,0
3,15603246,1,27.0,57000.0,0
4,15804002,0,19.0,76000.0,0


In [10]:
X = df.iloc[:,1:-1]
y = df.iloc[:,-1]

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [13]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [14]:
model = LogisticRegression()
model.fit(X_train, y_train)

In [15]:
y_pred = model.predict(X_test)

In [16]:
accuracy_score(y_test, y_pred)

0.8875

In [17]:
cm = confusion_matrix(y_test, y_pred)
cm

array([[50,  2],
       [ 7, 21]])

In [19]:
TN, FP, FN, TP = cm.ravel()

print(f'TP: {TP}\tFP: {FP}\nTN: {TN}\tFN: {FN}')

TP: 21	FP: 2
TN: 50	FN: 7


In [20]:
accuracy = (TP + TN)/(TP + TN + FP + FN)
precision = TP/(TP + FP)
recall = TP/(TP + FN)
f1_score = 2 * ((precision * recall)/(precision + recall))
error_rate = 1 - accuracy

In [21]:
print(f'Accuracy: {accuracy:.4f}\nPrecision: {precision:.4f}\nRecall: {recall:.4f}\nF1-score: {f1_score:.4f}\nError Rate: {error_rate:.4f}')

Accuracy: 0.8875
Precision: 0.9130
Recall: 0.7500
F1-score: 0.8235
Error Rate: 0.1125
