In [None]:
# PRblem statement:- Implement logistic regression using python on given dataset
# Compute Confusion matrix to find TP, FP, TN, FN, Accuracy, Error rate, Precision, Recall on the given
# dataset
# tp=true positive
# tn = true negative

In [None]:
# notes
# Logistic Regression: Classification techniques are an essential part of machine learning and data
# mining applications. Approximately 70% of problems in Data Science are classification problems.
# There are lots of classification problems that are available, but logistic regression is common and is a
# useful regression method for solving the binary classification problem.

In [None]:
# formulae
# accurary = (tp + tn)/(tp + tn + fp + fn)
# Precision = tp/(tp + fp)
# Recall = tp/(tp + fn)

In [None]:
# Need of scikit-learn library:-
# Overall, scikit-learn simplifies the process of implementing machine learning algorithms \
# and enables data scientists to build, evaluate, and deploy models more efficiently.

In [None]:
# Importing libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# `from sklearn.model_selection import train_test_split` is importing the `
# train_test_split` function from the `model_selection` module of the `sklearn` library.
# This function is commonly used to split a dataset into training and testing sets for machine learning
# model training and evaluation.
from math import exp

plt.rcParams["figure.figsize"] = (10, 6)

In [None]:
# Load the data

data = pd.read_csv("Social_Network_Ads.csv")
data.head() # it returns only first five rows

In [None]:
# Visualizing the dataset
plt.scatter(data['Age'], data['Purchased'])
plt.show()

# Divide the data to training set and test set
X_train, X_test, y_train, y_test = train_test_split(data['Age'], data['Purchased'], test_size=0.20)


In [None]:
# Creating the logistic regression model


# Helper function to normalize data
def normalize(X):
    return X - X.mean()


# Method to make predictions
def predict(X, b0, b1):
    return np.array([1 / (1 + exp(-1 * b0 + -1 * b1 * x)) for x in X])


# Method to train the model
def logistic_regression(X, Y):

    X = normalize(X)

    # Initializing variables
    b0 = 0
    b1 = 0
    L = 0.001
    epochs = 300

    # The line `for epoch in range(epochs):` is setting up a loop that iterates over a specified number
    # of epochs. In this context, an epoch refers to a complete pass through the entire dataset during
    # the training process of the logistic regression model. The loop allows the model to update its
    # parameters iteratively over multiple epochs in order to minimize the loss function and improve
    # the model's predictive performance.
    for epoch in range(epochs):
        y_pred = predict(X, b0, b1)
        D_b0 = -2 * sum(
            (Y - y_pred) * y_pred * (1 - y_pred)
        )  # Derivative of loss wrt b0
        D_b1 = -2 * sum(
            X * (Y - y_pred) * y_pred * (1 - y_pred)
        )  # Derivative of loss wrt b1
        # Update b0 and b1
        b0 = b0 - L * D_b0
        b1 = b1 - L * D_b1

    return b0, b1

In [None]:
# Training the model
# `b0, b1 = logistic_regression(X_train, y_train)` is training a logistic regression model using 
# the training data `X_train` and `y_train`. The function `logistic_regression` returns the 
# coefficients `b0` and `b1` that define the logistic regression model. These coefficients are 
# used to make predictions on new data points.
b0, b1 = logistic_regression(X_train, y_train)

# Making predictions
X_test_norm = normalize(X_test)
y_pred = predict(X_test_norm, b0, b1)
y_pred = [1 if p >= 0.5 else 0 for p in y_pred]

plt.clf()
plt.scatter(X_test, y_test)
plt.scatter(X_test, y_pred, c="red")
plt.show()



In [None]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, y_pred)


In [None]:
#another way to print TP,FN,FP,TN
tp, fn, fp, tn = confusion_matrix(y_test, y_pred,labels=[1,0]).reshape(-1)
print('Outcome values : \n', tp, fn, fp, tn)

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
results = confusion_matrix(y_test, y_pred)
print ('Confusion Matrix :')
print(results)
print ('Accuracy Score :',accuracy_score(y_test, y_pred))
print('Classification Report : ')
print (classification_report(y_test, y_pred))

In [None]:
#error rate
Accuracy=accuracy_score(y_test, y_pred)
print(Accuracy)
Error_rate=1-Accuracy
print(Error_rate)