In [None]:
from sklearn.datasets import fetch_openml
import numpy as np 
import pandas as pd

# preprocessing the data

## fetch the data from sklearn

In [None]:
mnist = fetch_openml('mnist_784')
mnist

### convert data from dictionary to data frame

In [None]:
df = pd.DataFrame.from_dict(mnist.data)
df['target'] = mnist.target.astype("int32")

### substract the images with 0 and 1

In [None]:
df = df.loc[ df['target'].isin( [0 , 1] ) ]

## Standarize the data 

In [None]:
def standarization(X):
    return ( X - np.mean(X) ) / np.std(X) 

In [None]:
# convert data frame into numpy
X = df.to_numpy()
# make standarization to X
standarization(X)
X

## split data into X and y 

In [None]:
# delete target column from X
X = np.delete(X , 784 , axis = 1)
# convert data frame into numpy
y = df['target'].to_numpy()

# Implement Logistic Regression From Scratch

In [None]:
class LogisticRegression:
    def __init__(self , learning_rate = 0.01 , epoches = 1000 , tolerance = 1e-7):
        self.learning_rate = learning_rate
        self.epoches = epoches
        self.tolerance = tolerance
    
    def sigmoid(self , z):
        return 1 / ( 1 + np.exp(-z)) 
    
    
    def yPredict(self ,X , w , b) :
        return np.dot(w , X.T) + b
    
    def identifyClassWithThreshold(self ,y_predict , threshold = 0.5):
        for predicted_value in y_predict:
            if predicted_value >= threshold :
                predicted_value = 1
            else:
                predicted_value = 0

    
    def accuracy(self , y , y_predict):
        true = 0 
        for (actual , predicted ) in zip(y, y_predict):
            if actual == predicted:
                true += 1
        return ( true / len(y) )   * 100 
    
    def costWithCrossEntropy(self , y , y_predicted):
        length = len(y) 
        first_term  =  np.dot(-y , np.log(y_predicted) )
        second_term =  np.dot((1 - y) , np.log(1 - y_predicted))
        result = (1 / length) * (first_term - second_term)
        return result 
    
    def fit(self, X , y):
        # intializa the weights randomly
        w = np.random.rand(X.shape[1]) 
        b = np.random.rand(1) 
        length = len(X)
        
        for epoch in range(self.epoches):
            # calculate net value
            z = np.dot(w , X.T) + b
            
            # plugin activation function to net_value to get predicted value
            y_predicted = self.sigmoid(z) 
            
            # calculating cost with cross entropy
            error = self.costWithCrossEntropy(y , y_predicted)
            
            if error <= self.tolerance:
                break
            
            # update the weights 
            dw =  (1 / length) * np.dot(y_predicted - y , X) 
            w = w - dw * self.learning_rate
            
            db = np.mean( y_predicted - y)
            b = b - db * self.learning_rate
            
            
        return w , b    
            
        
        

# Implement Cross Validation From Scratch

In [None]:
def kFoldCrossValidation(X , y , model , folds = 10 ):
    length = len(X)
    ratio = length / folds
    accuracy = 0
    for fold in range(folds):
        start = int(fold * ratio)
        end  = int((fold + 1) * ratio)
        
        # split data into train and validate
        x_validate , y_validate = X[start : end] , y[start : end]
        x_train , y_train = np.concatenate( (X[:start] , X[end:]) ) , np.concatenate((y[:start] , y[end:])) 
        
        # make standarization to the data
        standarization(x_train)
        standarization(x_validate)
        
        # learn the model 
        w , b = model.fit(x_train , y_train)
        
        # get net_value for validate data set
        net_value = model.yPredict(x_validate , w , b)
        
        # calculate y_predict from net_value using activation function 
        y_predict = model.sigmoid(net_value)
        

        # plugin threshold into y_predict
        model.identifyClassWithThreshold(y_predict)
        
        accuracy += model.accuracy(y_validate , y_predict)
    
    accuracy /= folds    
    
    print('with learning rate : ' , model.learning_rate , ' average of accuracy was : ' , accuracy)    
        

# test the model with cross validation

In [None]:
number_of_learning_rate = int(input("Please enter number of learning rate you want to test : "))
list_of_learning_rates = []
for i in range (number_of_learning_rate):
    learning_rate = float( input(f"Enter your {i + 1} learning rate : ") )
    list_of_learning_rates.append(learning_rate)

In [None]:
model = LogisticRegression(epoches=100)
for learning_rate in list_of_learning_rates:
    model.learning_rate = learning_rate
    kFoldCrossValidation(X , y , model)