In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

# Load the dataset
data = pd.read_csv('Social_Network_Ads.csv')

# Data preprocessing
# Encode categorical variable 'Gender'
data['Gender'] = data['Gender'].apply(lambda x: 1 if x == 'Male' else 0)

# Select features and target variable
x_ind = data[['Gender', 'Age', 'EstimatedSalary']]
y_dep = data['Purchased']

# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x_ind, y_dep, test_size=0.2, random_state=42)

# Feature scaling
#  Feature scaling is a technique used to standardize the range of independent variables or features of the data
# It's important because many machine learning algorithms perform better or converge faster when features are on a relatively similar scale and close to normally distributed.
# StandardScaler is a method for standardizing features by removing the mean and scaling to unit variance. It transforms the data such that it has a mean of 0 and a standard deviation of 1.
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

# Train the logistic regression model
logreg = LogisticRegression()
logreg.fit(x_train, y_train)

# Make predictions on the testing set
y_pred = logreg.predict(x_test)

# Compute metrics
cm = confusion_matrix(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)

print("Confusion Matrix:")
print(cm)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("Accuracy:", accuracy)


Confusion Matrix:
[[50  2]
 [ 7 21]]
Precision: 0.9130434782608695
Recall: 0.75
F1 Score: 0.8235294117647057
Accuracy: 0.8875
