In [1]:
# 1. Import required libraries
import pandas as pd  # For handling data
import numpy as np   # For numerical operations
from sklearn.model_selection import train_test_split  # For splitting the data into training and testing sets
from sklearn.preprocessing import StandardScaler  # For scaling feature
from sklearn.linear_model import LogisticRegression  # For creating the logistic regression model
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score  # For evaluating the model

In [3]:
# 2. Load the dataset
df = pd.read_csv('Social_Network_Ads.csv')

In [5]:
df.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0


In [19]:
# 4. Select features and target variable
X = df.iloc[:, [2, 3]].values  # Selects the independent variables (features) - Age and EstimatedSalary
y = df.iloc[:, 4].values  # Selects the dependent variable (target) - Purchased (0 or 1)


In [11]:
# 5. Split the dataset into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)  # Splits data for training and testing


In [23]:
# 6. Feature scaling
scaler = StandardScaler()  # Initializes the scaler
X_train = scaler.fit_transform(X_train)  # Scales the training data
X_test = scaler.transform(X_test)  # Scales the test data using the same scaler

In [25]:
# 7. Create the logistic regression model
model = LogisticRegression()  # Initializes the logistic regression model


In [27]:
# 8. Train the model
model.fit(X_train, y_train)  # Fits the model with training data

In [29]:
# 9. Make predictions
y_pred = model.predict(X_test)  # Predicts the target variable on test data

In [31]:
# 10. Compute the confusion matrix
cm = confusion_matrix(y_test, y_pred)  # Computes the confusion matrix
print("Confusion Matrix:\n", cm)  # Displays the confusion matrix


Confusion Matrix:
 [[50  2]
 [ 9 19]]


In [33]:
# 11. Compute evaluation metrics
TP = cm[1, 1]  # True Positive (Predicted 1 and actual 1)
TN = cm[0, 0]  # True Negative (Predicted 0 and actual 0)
FP = cm[0, 1]  # False Positive (Predicted 1 and actual 0)
FN = cm[1, 0]  # False Negative (Predicted 0 and actual 1)

In [35]:
# 12. Accuracy
accuracy = accuracy_score(y_test, y_pred)  # Computes the accuracy
print(f"Accuracy: {accuracy}")

Accuracy: 0.8625


In [37]:
# 13. Error Rate
error_rate = 1 - accuracy  # Computes the error rate (1 - accuracy)
print(f"Error Rate: {error_rate}")

Error Rate: 0.13749999999999996


In [39]:
# 14. Precision
precision = precision_score(y_test, y_pred)  # Computes precision (TP / (TP + FP))
print(f"Precision: {precision}")

Precision: 0.9047619047619048


In [41]:
# 15. Recall
recall = recall_score(y_test, y_pred)  # Computes recall (TP / (TP + FN))
print(f"Recall: {recall}")

Recall: 0.6785714285714286


In [43]:
# 16. F1-Score (Combines Precision and Recall)
f1 = f1_score(y_test, y_pred)  # Computes F1-Score
print(f"F1-Score: {f1}")

F1-Score: 0.7755102040816326


In [47]:
# Precision
precision = TP / (TP + FP)

# Recall
recall = TP / (TP + FN)

# F1 Score
f1_score = 2 * (precision * recall) / (precision + recall)

# Accuracy
accuracy = (TP + TN) / (TP + TN + FP + FN)

# Print metrics
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1_score}")
print(f"Accuracy: {accuracy}")

Precision: 0.9047619047619048
Recall: 0.6785714285714286
F1 Score: 0.7755102040816326
Accuracy: 0.8625
