In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score

In [None]:
# Load the dataset
df = pd.read_csv("cleaned_social_network_ads.csv")

# Display first 5 rows of dataset
print("Sample Data:\n", df.head())

In [None]:
# Encode categorical variable (Gender)
label_encoder = LabelEncoder()
df['Gender'] = label_encoder.fit_transform(df['Gender'])  # Male = 1, Female = 0

In [None]:
# Split data into features and target variable
X = df[['Age', 'Gender', 'EstimatedSalary']]
y = df['Purchased']

In [None]:
# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [None]:
# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
print("\nFeature Scaling Formula: X_scaled = (X - mean) / std_dev")

In [None]:
# Train Logistic Regression model
classifier = LogisticRegression(random_state=42)
classifier.fit(X_train, y_train)

In [None]:
# Predict on test set
y_pred = classifier.predict(X_test)
y_prob = classifier.predict_proba(X_test)[:, 1]  # Get probabilities for class 1 (Purchased)

In [None]:
# Compute confusion matrix
cm = confusion_matrix(y_test, y_pred)
TP = cm[1, 1]
FP = cm[0, 1]
TN = cm[0, 0]
FN = cm[1, 0]

In [None]:
# Compute evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
error_rate = 1 - accuracy
precision = precision_score(y_test, y_pred, zero_division=1)
recall = recall_score(y_test, y_pred, zero_division=1)

In [None]:
print("\nConfusion Matrix:\n", cm)
print(f"\nAccuracy: {accuracy:.2f} (Formula: (TP + TN) / (TP + FP + TN + FN))")
print(f"Error Rate: {error_rate:.2f} (Formula: 1 - Accuracy)")
print(f"Precision: {precision:.2f} (Formula: TP / (TP + FP))")
print(f"Recall: {recall:.2f} (Formula: TP / (TP + FN))")

In [None]:
# Display some predictions with their probabilities
df_results = pd.DataFrame({"Actual": y_test.values, "Predicted": y_pred, "Probability": y_prob})
print("\nSample Predictions with Probability:\n", df_results.head())

In [None]:
# Plot confusion matrix
plt.figure(figsize=(6, 4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Not Purchased', 'Purchased'], yticklabels=['Not Purchased', 'Purchased'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()