STEP1: IMPORTING LIBRARIES

In [1]:
# Basic Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Machine Learning
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


STEP2: LOAD AND EXPLORE DATASET


In [9]:
url = "https://raw.githubusercontent.com/Oscar4561/Social_Network_Ads.csv/refs/heads/main/Social_Network_Ads.csv"


df = pd.read_csv(url)

#Display top 5 rows of dataset
df.head()


Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0


STEP3: DATA CLEANING AND PREPARATION


In [10]:
#1. Dropping user ID as it is just an identifier
df.drop('User ID',axis=1,inplace=True) #axis=1 for columns
#inplace=True: directly change in the dataset

#2. Encoding Categorical values
le=LabelEncoder()
df['Gender']=le.fit_transform(df['Gender']) #0:female, 1: Male

#3. Features 'X'
X=df.drop('Purchased',axis=1)

#4. Target variable 'y'
y=df['Purchased']

#5. Splitting the dataset :80/20
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)


STEP4 : TRAINING THE LOGISTIC REGRESSION MODEL

In [11]:
# Initialize the model
model = LogisticRegression()

# Train the model
model.fit(X_train, y_train)

# Display parameters
print("Model trained successfully!")
print("Coefficients:", model.coef_)
print("Intercept:", model.intercept_)

Model trained successfully!
Coefficients: [[2.84619221e-01 2.10719873e-01 3.58422448e-05]]
Intercept: [-11.69199787]


STEP5: MAKING PREDICTIONS AND COMPARE RESULTS

In [13]:
#predicting on test set
y_pred=model.predict(X_test)

# Create a result DataFrame
results = pd.DataFrame({
    'Gender': X_test['Gender'],
    'Age': X_test['Age'],
    'Salary': X_test['EstimatedSalary'],
    'Actual': y_test,
    'Predicted': y_pred
})

print(results.head())

     Gender  Age  Salary  Actual  Predicted
209       0   46   22000       0          0
280       0   59   88000       1          1
33        0   28   44000       0          0
210       0   48   96000       1          1
93        0   29   28000       0          0


STEP6: MODEL EVALUATION


In [15]:
#1. Accuracy
accuracy=accuracy_score(y_test,y_pred)
print(f"Accuracy: { round(accuracy * 100, 2)}%")

#2. Confusion Matrix [[TN FP],[FN TP]]
conmat=confusion_matrix(y_test,y_pred)
print("Confusion Matrix:")
print(conmat)

#3. Classification Report
classrep=classification_report(y_test,y_pred)
print("Classification Report:")
print(classrep)


Accuracy: 88.75%
Confusion Matrix:
[[50  2]
 [ 7 21]]
Classification Report:
              precision    recall  f1-score   support

           0       0.88      0.96      0.92        52
           1       0.91      0.75      0.82        28

    accuracy                           0.89        80
   macro avg       0.90      0.86      0.87        80
weighted avg       0.89      0.89      0.88        80

