In [1]:
# Importing libraries
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.preprocessing import OneHotEncoder

In [2]:
d = datasets.load_breast_cancer()
x, y = d.data, d.target

In [3]:
x


array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
        1.189e-01],
       [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
        8.902e-02],
       [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
        8.758e-02],
       ...,
       [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
        7.820e-02],
       [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
        1.240e-01],
       [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
        7.039e-02]])

In [4]:
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,
       0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,
       1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0,

In [5]:
# defining a fuction for train -test split

def split_data(X, y, test_size=0.2, random_state=0):
    np.random.seed(random_state)                  
    #set the seed for reproducible results
    indices = np.random.permutation(len(X))       
    #shuffling the indices        
    data_test_size = int(X.shape[0] * test_size)  
    #Get the test size

    #Separating the Independent and Dependent features into the Train and Test Set
    train_indices = indices[data_test_size:]
    test_indices = indices[:data_test_size]
    X_train = X[train_indices]
    y_train = y[train_indices]
    X_test = X[test_indices]
    y_test = y[test_indices]
    return X_train, y_train, X_test, y_test

In [6]:
class LogisticRegression:
    def __init__(self, learning_rate=0.01, num_iterations=100000, y_pred = None):
        self.learning_rate = learning_rate
        self.num_iterations = num_iterations
        self.bias = None
        self.weights = None
    
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))
    
    def fit(self,X,y):
            n_samples, n_features= X.shape
            self.weights=np.zeros(n_features)
            self.bias=0
            
                
            for i in range(self.num_iterations):
                    linear_pred = np.dot(X, self.weights) + self.bias
                    predictions = self.sigmoid(linear_pred)
                    #error = -1/size * np.sum(y * np.log(predictions)) + (1 - y) * np.log(1-sigma)
                
                    # calculating gradient for weight and bias)
                   # print(np.dot(X.T, error))
                    dw = (1/n_samples) * np.dot(X.T, (predictions - y))
                    #
                   # print(dw)
                    db = (1/n_samples) * np.sum(predictions - y)
                
                
                    # for updating weights and bias
                    self.weights -= self.learning_rate * dw
                    self.bias -= self.learning_rate * db


    def predict(self, X):
        Z = np.dot(X, self.weights) + self.bias
        LR = self.sigmoid(Z)
        class_pred = [1 if y > 0.5 else 0 for y in LR]
        return np.array(class_pred)


    def accuracy_score(self, y_test, y_pred):
        """Calculate the accuracy score for a set of predictions."""
        n_correct = np.sum(y_test == y_pred)
        n_total = len(y_test)
        accuracy = n_correct / n_total
        return accuracy
    


In [7]:
#Splitting the dataset
X_train, y_train, X_test, y_test = split_data(x,y)
    

In [8]:
# model fitting
model=LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

  return 1 / (1 + np.exp(-x))


In [9]:
acc = model.accuracy_score (y_test, y_pred)
print(acc)

0.8584070796460177
