In [8]:
import pandas as pd
import numpy as np
import os
import copy
import sys
from scipy.special import expit
from math import e

class Regress:

    def __init__(self, features):
        self.weights = None
        
    def fit(self, data, lr, y, iterations=100):

        # Initialize our weights
        if self.weights is None: self.weights = np.zeros(data.shape[1])

        # Flatten the classes
        y = y.flatten()

        # Run for the number of iterations
        for _ in range(iterations):
            # Find the gradient of our current model state
            grad = gradient(self.weights, data, y)
            # Update weights by the learning rate
            self.weights -= lr * grad

    def predict(self, x):
        # Return the models prediction for these features
        return int(model(x, self.weights) > 0.5)

    def reset(self):
        # Reset the models weights
        self.weights = None

def sigmoid(z):
    # Calculate the sigmoid value for the given input
    return 1 / (1 + np.exp(-z))

def model(x, weights):
    # Take the dot product of the input features and weights vectors
    # Then return the sigmoid of this result
    return sigmoid(np.dot(x, weights))

def gradient(weights, x, y):
    # Get the models predictions given these features
    preds = model(x, weights)
    # determine the gradient of these features
    # defined as: X (sigmoid(W_k^T dot X) - Y)
    gradient = np.dot(x.T, preds - y)
    return gradient


class KFold2:
    def __init__(self, reg, k=10):
        self.k = k
        self.reg = reg
    
    def shuffle(self, df):
        return df.sample(frac=1).reset_index(drop=True)

    def splitAtX(self, k,x, df):
        sizeOfSets = len(df) // k
        training = copy.deepcopy(df)
        validation = training[sizeOfSets*x:sizeOfSets+(sizeOfSets*x)]
        for i in range(sizeOfSets*x,sizeOfSets+(sizeOfSets*x)):
            training = training.drop(i)
        self.training = training
        self.validatation = validation
        return training, validation

    def accuEval(self, lr, iterations, shuffled):
        err = 0
        accurate = 0
        inaccurate = 0

        for _ in range(5):
            count = 0
            accurate = 0
            inaccurate = 0

            for x in range(0, self.k):
                trainingSet, validationSet = self.splitAtX(self.k,x,shuffled)
                trainingSetData = trainingSet.iloc[:,:-1]
                classes = trainingSet.iloc[:,-1:]
                self.reg.fit(trainingSetData.to_numpy(), lr, classes.to_numpy(), iterations)
                validationSetData = validationSet.iloc[:,:-1]
                validationSetLabel = validationSet.iloc[:,-1:]
                validationSetLabel = validationSetLabel.to_numpy()
                count = 0
                for i in validationSetData.iterrows():
                    trainingArr = []
                    for j in range(0,len(i[1].to_numpy())):
                        trainingArr.append(i[1].to_numpy()[j])
                    if self.reg.predict(trainingArr) == validationSetLabel[count][0]:
                        accurate += 1
                    else:
                        inaccurate += 1
                    count += 1
                err += inaccurate/count
                #print(accuracyEval)
            err = err/5
        return err

    def run(self):
        return 0


def loadCSV(filename):
    return pd.read_csv(filename)


lrVals = [0.01, 0.04, 0.08, 0.1, 0.2, 0.5, 0.8]
numIterations = [10, 40, 100, 200, 500, 1000, 5000]

df = loadCSV("bankrupcy.csv")
linReg = regress.Regress(len(df.iloc[:,:-1].columns))
kFoldData = KFold2(reg=linReg)
shuffled = kFoldData.shuffle(df)
for lr in lrVals:
    for iterations in numIterations:
        print("Average Error Rate: " + str(kFoldData.accuEval(lr=lr, iterations=iterations, shuffled=shuffled)))
        print("LR = " + str(lr) + "\tIterations = " + str(iterations))

bestAccuracyOne = 0
bestParametersOne = ""

Average Error Rate: 3.3735395555555554
LR = 0.01	Iterations = 10
Average Error Rate: 2.3941688888888892
LR = 0.01	Iterations = 40
Average Error Rate: 2.2972586666666666
LR = 0.01	Iterations = 100
Average Error Rate: 2.0958719999999995
LR = 0.01	Iterations = 200
Average Error Rate: 2.199758222222222
LR = 0.01	Iterations = 500


  return 1 / (1 + np.exp(-z))


Average Error Rate: 2.3498595555555553
LR = 0.01	Iterations = 1000
Average Error Rate: 2.1993102222222225
LR = 0.01	Iterations = 5000
Average Error Rate: 3.2404195555555555
LR = 0.04	Iterations = 10
Average Error Rate: 3.551786666666667
LR = 0.04	Iterations = 40
Average Error Rate: 3.378737777777778
LR = 0.04	Iterations = 100
Average Error Rate: 3.469582222222222
LR = 0.04	Iterations = 200
Average Error Rate: 2.7200284444444445
LR = 0.04	Iterations = 500
Average Error Rate: 2.8934399999999996
LR = 0.04	Iterations = 1000
Average Error Rate: 3.4286222222222222
LR = 0.04	Iterations = 5000
Average Error Rate: 3.1832462222222224
LR = 0.08	Iterations = 10
Average Error Rate: 3.7914168888888895
LR = 0.08	Iterations = 40
Average Error Rate: 3.0902186666666673
LR = 0.08	Iterations = 100
Average Error Rate: 3.488248888888889
LR = 0.08	Iterations = 200
Average Error Rate: 2.909006222222222
LR = 0.08	Iterations = 500
Average Error Rate: 2.8899057777777775
LR = 0.08	Iterations = 1000
Average Error 

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive
