<a href="https://colab.research.google.com/github/Jay-Nehra/Machine_Learning_Algorithms/blob/main/LogisticRegression/LogisticRegression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd

diabetes_df = pd.read_csv('https://github.com/YBIFoundation/Dataset/raw/main/Diabetes.csv')
diabetes_df.describe()

Unnamed: 0,pregnancies,glucose,diastolic,triceps,insulin,bmi,dpf,age,diabetes
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,3.845052,120.894531,69.105469,20.536458,79.799479,31.992578,0.471876,33.240885,0.348958
std,3.369578,31.972618,19.355807,15.952218,115.244002,7.88416,0.331329,11.760232,0.476951
min,0.0,0.0,0.0,0.0,0.0,0.0,0.078,21.0,0.0
25%,1.0,99.0,62.0,0.0,0.0,27.3,0.24375,24.0,0.0
50%,3.0,117.0,72.0,23.0,30.5,32.0,0.3725,29.0,0.0
75%,6.0,140.25,80.0,32.0,127.25,36.6,0.62625,41.0,1.0
max,17.0,199.0,122.0,99.0,846.0,67.1,2.42,81.0,1.0


In [None]:
# Let's start with standardizing the data
from sklearn.preprocessing import StandardScaler
diabetes_features = diabetes_df.iloc[:, :-1].values
diabetes_result = diabetes_df.iloc[:, -1].values
std_scaler = StandardScaler()
std_diabetes_feat = std_scaler.fit_transform(diabetes_features)

In [None]:
#Split the data into 'Train' and 'Test'
from sklearn.model_selection import train_test_split

train_std_diabetes_feat, test_std_diabetes_feat, train_diabetes_result, test_diabetes_result = train_test_split(std_diabetes_feat, diabetes_result, test_size = 0.25, shuffle = True, stratify = diabetes_result)
print("Label Count in the overall diabetes dataset is :", np.bincount(diabetes_result))
print("Label Count in the Train Split of the dataset is :", np.bincount(train_diabetes_result))
print("Label Count in the Test Split of the dataset is :", np.bincount(test_diabetes_result))

Label Count in the overall diabetes dataset is : [500 268]
Label Count in the Train Split of the dataset is : [375 201]
Label Count in the Test Split of the dataset is : [125  67]


In [None]:
"""
    For the Logistic regression, I'll need the functionality to calculate the net input which is weight and
    the feature vector dot product plus the bias value. Second, for the activation function Logistic regression
    uses the Logistic or the Sigmoid Function. And I'll use the fit and predict paradigm so i'll need to calculate those.

"""
class logistic_regression():
    def __init__(self, eta = 0.001, epochs = 5000, random_seed = 1):
        self.eta = eta
        self.epochs = epochs
        self.random_seed = random_seed

    def net_input(self, X):
        return np.dot(X, self.weight[1:]) + self.weight[0]

    def activation_sigmoid(self, z):
        return (1/ (1+np.exp(-np.clip(z, -250,250))))

    def predict(self, X):
        return np.where(self.activation_sigmoid(self.net_input(X)) > 0.5, 1, 0)

    def fit(self, X, y):
        r_seed = np.random.RandomState(self.random_seed)
        n_features = X.shape[1]
#         self.weight = r_seed.normal(loc=0.0, scale=0.01, size = 1+ X.shape[1])
        self.weight = r_seed.normal(loc=0.0, scale=1 / np.sqrt(n_features), size = 1+ X.shape[1])
        self.loss = []
        self.accuracy = []

        for _ in range(self.epochs):
            net_input = self.net_input(X)
            sig_output = self.activation_sigmoid(net_input)
            error = y - sig_output
            self.weight[1:] += self.eta * 2 * X.T.dot(error) / X.shape[0]
            self.weight[0] += self.eta * 2 * error.mean()
            self.loss.append(-y.dot(np.log(sig_output)) - (1-y).dot(np.log(1-sig_output)))
            self.accuracy.append(np.mean(y == self.predict(X)))

        return self.weight, self.loss, self.accuracy


In [None]:
LogReg = logistic_regression()

In [None]:
weight, loss , acc = LogReg.fit(train_std_diabetes_feat,train_diabetes_result)

In [None]:
y_pred = LogReg.predict(test_std_diabetes_feat)
print(y_pred)

[0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 0 0 1 0 1 0 1 1 0 1
 0 0 0 0 1 0 0 0 1 0 1 0 0 1 1 0 0 0 0 0 1 0 1 1 1 0 0 0 0 0 1 1 0 0 1 1 1
 0 0 0 0 0 0 1 0 0 0 0 1 0 0 1 1 0 1 1 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 1
 0 1 1 0 0 0 0 1 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 1 0 0 0 0 1 0 1 0 1 0 0 0
 1 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 1 0 0 1 1 0 1 1 0 1 0 0 0 1 0 0 0 0 0 1
 0 0 0 0 1 1 1]


In [None]:
# Calculate the number of correct predictions
correct_predictions = np.sum(y_pred == test_diabetes_result)

# Calculate the accuracy
accuracy = correct_predictions / len(test_diabetes_result)

print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 75.00%


In [None]:
X_train, X_test, y_train, y_test = train_std_diabetes_feat, test_std_diabetes_feat, train_diabetes_result, test_diabetes_result

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
model = LogisticRegression(max_iter=50)
model.fit(X_train,y_train)
y_pred = model.predict(X_test)
accuracy_score(y_test,y_pred)

0.7604166666666666