In [26]:
import numpy as np

def sigmoid(x):
    return 1/(1+np.exp(-x))

class LogisticRegression:

    def __init__(self, learning_rate, n_iterations):
        self.learning_rate = learning_rate
        self.n_iterations = n_iterations
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        # Initialize the number of samples and features
        # Each row are samples, each column are features
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for _ in range(self.n_iterations):
            # Linear regression: w.x + b
            linear_predictions = np.dot(X, self.weights) + self.bias
            # Logistic regression: 1 / (1 + exponential(-(w.x + b)))
            predictions = sigmoid(linear_predictions)

            # update the weights and bias value
            dw = (1 / n_samples) * np.dot(X.T, (predictions - y))
            db = (1 / n_samples) * np.sum(predictions - y)

            self.weights = self.weights - self.learning_rate * dw
            self.bias = self.bias - self.learning_rate * db

    def predict(self, X):
        linear_predictions = np.dot(X, self.weights) + self.bias
        y_predictions = sigmoid(linear_predictions)
        class_predictions = [0 if y <= 0.5 else 1 for y in y_predictions]
        return class_predictions

In [27]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import datasets
import matplotlib.pyplot as plt

path = 'pima-indians-diabetes.data.csv'
data = np.genfromtxt(path, delimiter=',')

X, y = data[:, :-1], data[:, -1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)

clf = LogisticRegression(learning_rate=0.0001, n_iterations=500)
clf.fit(X_train,y_train)
y_pred = clf.predict(X_test)

def accuracy(y_pred, y_test):
    return np.sum(y_pred==y_test)/len(y_test)

acc = accuracy(y_pred, y_test)
print(f'Accuracy: {acc: .4f}')

Accuracy:  0.6558
