In [4]:
import numpy as np
from matplotlib import pyplot as plt
import pandas

In [5]:
def prepare_dataset(filename):
    '''
    ###### Objective
    A function used to extract data from the csv file and preprocess it, such that it can be
    used by our machine learning algorithm
    
    ### Input
    filename
    
    #### Output
    Input and output of machine learning model (in this case, a logistic regression model)
    
    '''
    
    dataframe = pandas.read_csv(filename)
    
    X1,X2,X3 = [],[],[]
    X,Y = [],[]
    
    for item,row in dataframe.iterrows():
        X1.append(row['EXAM1'])
        X2.append(row['EXAM2'])
        X3.append(row['FINAL'])
        
    for i in range(len(X1)):
        X.append([1,X1[i]/100,X2[i]/100])
        if X3[i]>160:
            Y.append([1])
        else:
            Y.append([0])
        
    X = np.array(X)
    Y = np.array(Y)
    
    return X,Y

In [None]:
X,Y = prepare_dataset('exam.csv')

In [None]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

In [None]:
def sigmoid_proba(x):
    if sigmoid(x)>0.5:
        return 1
    else:
        return 0

In [None]:
np.random.seed(100)

EPOCHS = 10
LR = 1e-1

BATCH_SIZE = X.shape[0]

theta = np.random.randn(X.shape[1],1)

loss = []

for i in range(EPOCHS):
    epoch_loss = 0.0
    for b in range(0,len(X), BATCH_SIZE):
        model_output = sigmoid(X[b:b+BATCH_SIZE]@theta)
        
        d_theta = (X[b:b+BATCH_SIZE].T@(((model_output - Y[b:b+BATCH_SIZE]))))
        theta -= LR*(d_theta)################## gradient descent step
        
        epoch_loss += -((Y[b:b+BATCH_SIZE]*np.log(model_output)) + ((1-Y[b:b+BATCH_SIZE])*np.log(1-model_output))).mean()
    loss.append(epoch_loss)
    
print('The loss at the end of training is ==', loss[-1])
plt.plot(range(1,EPOCHS+1),loss)
plt.ylabel('BCE')
plt.xlabel('Number of epochs')
plt.show()
print(theta)


In [None]:
dataframe = pandas.read_csv('exam.csv')

X2 = []
X1 = []

for item,row in dataframe.iterrows():
    X1.append(row['EXAM1'])
    X2.append(row['EXAM2'])

categories = Y.reshape((len(Y)))

colormap = np.array(['r','g'])
plt.scatter(X1,X2, s=20,c+colormap[categories])

x1 = []
x2 = []
y = []

for i in range(40,100):
    for j in range(40,100):
        x1.append(i)
        x2.append(j)
        model_output = sigmoid_proba(np.array([1,i/100,j/100])@theta)
        y.append(model_output)
categories = y

plt.scatter(x1,x2, s=1,c=colormap[categories])

plt.ylabel('EXAM1')
plt.xlabel('EXAM2')
plt.show()