In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_auc_score, roc_curve

In [None]:
# Load dataset
df = pd.read_csv("Social_Network_Ads.csv")

# Display first 5 rows
df.head()

In [None]:
# Check dataset structure
df.info()

# Check summary statistics
df.describe()

# Check missing values
df.isnull().sum()

# Target variable distribution
df['Purchased'].value_counts()

In [None]:
# Drop 'User ID' and 'Gender' (not useful for prediction)
X = df.drop(['User ID', 'Gender', 'Purchased'], axis=1)

# Target column
y = df['Purchased']

In [None]:
# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42, stratify=y
)

In [None]:
# Standardize numerical features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
# Train logistic regression
model = LogisticRegression()
model.fit(X_train, y_train)

In [None]:
# Predict on test set
y_pred = model.predict(X_test)

# Predicted probabilities (for ROC curve)
y_pred_proba = model.predict_proba(X_test)[:, 1]

In [None]:
# Accuracy
print("Accuracy:", accuracy_score(y_test, y_pred))

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()

# Classification Report
print("Classification Report:\n", classification_report(y_test, y_pred))

# ROC-AUC Score
roc_auc = roc_auc_score(y_test, y_pred_proba)
print("ROC-AUC Score:", roc_auc)

In [None]:
# Plot ROC Curve
fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba)

plt.plot(fpr, tpr, label="Logistic Regression (AUC = {:.2f})".format(roc_auc))
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve")
plt.legend()
plt.show()