In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score


# Load the dataset

In [2]:
data = pd.read_csv('Social_Network_Ads.csv')

# Display the first few rows of the dataset and its information

In [3]:
print(data.head())

    User ID  Gender  Age  EstimatedSalary  Purchased
0  15624510    Male   19            19000          0
1  15810944    Male   35            20000          0
2  15668575  Female   26            43000          0
3  15603246  Female   27            57000          0
4  15804002    Male   19            76000          0


In [4]:
print(data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   User ID          400 non-null    int64 
 1   Gender           400 non-null    object
 2   Age              400 non-null    int64 
 3   EstimatedSalary  400 non-null    int64 
 4   Purchased        400 non-null    int64 
dtypes: int64(4), object(1)
memory usage: 15.8+ KB
None


In [6]:
data.isnull().sum()

User ID            0
Gender             0
Age                0
EstimatedSalary    0
Purchased          0
dtype: int64

# Separate features (X) and target variable (y)

In [7]:
X = data[['Age', 'EstimatedSalary']]
y = data['Purchased']

# Splitting the dataset into training and testing sets

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling

In [9]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Training the logistic regression model

In [10]:
model = LogisticRegression()
model.fit(X_train_scaled, y_train)

# Making predictions and probability estimates

In [11]:
y_pred = model.predict(X_test_scaled)

In [13]:
y_pred_prob = model.predict_proba(X_test_scaled)

# Displaying the predictions, true labels, and predicted probabilities

In [14]:
predictions_df = pd.DataFrame({'True Label': y_test,
                               'Predicted Label': y_pred,
                               'Probability of Class 0': y_pred_prob[:, 0],
                               'Probability of Class 1': y_pred_prob[:, 1]})

In [15]:
print("Predictions and Probabilities:")
print(predictions_df)

Predictions and Probabilities:
     True Label  Predicted Label  Probability of Class 0  \
209           0                0                0.735349   
280           1                1                0.024987   
33            0                0                0.977388   
210           1                1                0.140986   
93            0                0                0.983728   
..          ...              ...                     ...   
246           0                0                0.901899   
227           1                1                0.010237   
369           1                1                0.342366   
176           0                0                0.910321   
289           1                0                0.712657   

     Probability of Class 1  
209                0.264651  
280                0.975013  
33                 0.022612  
210                0.859014  
93                 0.016272  
..                      ...  
246                0.098101  
227     

# Computing Confusion Matrix

In [16]:
conf_matrix = confusion_matrix(y_test, y_pred)

In [17]:
print("Confusion Matrix:")
print(conf_matrix)

Confusion Matrix:
[[50  2]
 [ 9 19]]


# Extracting TP, FP, TN, FN from the confusion matrix

In [18]:
TP = conf_matrix[1][1]
FP = conf_matrix[0][1]
TN = conf_matrix[0][0]
FN = conf_matrix[1][0]

# Computing Accuracy

In [19]:
accuracy = accuracy_score(y_test, y_pred)

In [20]:
print("Accuracy:", accuracy)

Accuracy: 0.8625


# Computing Error Rate

In [21]:
error_rate = 1 - accuracy

In [22]:
print("Error Rate:", error_rate)

Error Rate: 0.13749999999999996


# Computing Precision

In [23]:
precision = precision_score(y_test, y_pred)

In [24]:
print("Precision:", precision)

Precision: 0.9047619047619048


# Computing Recall

In [25]:
recall = recall_score(y_test, y_pred)

In [26]:
print("Recall:", recall)

Recall: 0.6785714285714286


# Extracting TP, FP, TN, FN from the confusion matrix

In [27]:
print("TP ->",TP)

TP -> 19


In [28]:
print("FP ->",FP)

FP -> 2


In [29]:
print("TN ->",TN)

TN -> 50


In [30]:
print("FN ->",FN)

FN -> 9
