In [None]:
#1. Implement logistic regression using Python  to perform classification on Social_Network_Ads.csv dataset.
#2. Compute Confusion matrix to find TP, FP, TN, FN, Accuracy, Error rate, Precision, Recall on the given dataset..

In [1]:
# Step 1: Import required libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, classification_report

In [2]:
# Step 2: Load the dataset
df = pd.read_csv('Social_Network_Ads.csv')  # Adjust the file path as needed
df

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0
...,...,...,...,...,...
395,15691863,Female,46,41000,1
396,15706071,Male,51,23000,1
397,15654296,Female,50,20000,1
398,15755018,Male,36,33000,0


In [16]:
# Step 3: Preprocess the data
# Drop unnecessary columns
#df = df.drop(columns=['User ID'])  # 'User ID' is not useful for modeling
print(df.isnull().sum())


User ID            0
Gender             0
Age                0
EstimatedSalary    0
Purchased          0
dtype: int64


In [5]:
# Convert categorical Gender column to numeric
df['Gender'] = df['Gender'].map({'Male': 1, 'Female': 0})

In [6]:
# Step 4: Define features (X) and target (y)
X = df[['Gender', 'Age', 'EstimatedSalary']]
y = df['Purchased']


In [7]:
# Step 5: Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [8]:
# Step 6: Scale the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [9]:
# Step 7: Train the logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)

In [10]:
# Step 8: Predict on the test set
y_pred = model.predict(X_test)


In [11]:
# Step 9: Evaluate the model
# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:\n", cm)



Confusion Matrix:
 [[61  2]
 [10 27]]


In [12]:
# Extract True Positives (TP), False Positives (FP), True Negatives (TN), False Negatives (FN)
tn, fp, fn, tp = cm.ravel()

In [13]:
# Compute metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
error_rate = 1 - accuracy


In [28]:
# Print evaluation metrics
print(f"\nTrue Positives (TP): {tp}")
print(f"False Positives (FP): {fp}")
print(f"True Negatives (TN): {tn}")
print(f"False Negatives (FN): {fn}")
print(f"\nAccuracy: {accuracy:.2f}")
print(f"Error Rate: {error_rate:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")

print("True Positives (TP):",tp)
print(fp)
print(tn)
print(fn)
print(accuracy)
print(error_rate)
print(precision)
print(recall)


True Positives (TP): 27
False Positives (FP): 2
True Negatives (TN): 61
False Negatives (FN): 10

Accuracy: 0.88
Error Rate: 0.12
Precision: 0.93
Recall: 0.73
True Positives (TP): 27
2
61
10
0.88
0.12
0.9310344827586207
0.7297297297297297


In [15]:
# Step 10: Detailed Classification Report
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Classification Report:
               precision    recall  f1-score   support

           0       0.86      0.97      0.91        63
           1       0.93      0.73      0.82        37

    accuracy                           0.88       100
   macro avg       0.90      0.85      0.86       100
weighted avg       0.89      0.88      0.88       100

