In [91]:
import matplotlib.pyplot as plt
import numpy as np
import math

In [118]:
# Loading CSV:

def loadCSV(filename):
    data = np.loadtxt(filename, delimiter=',', skiprows=1, unpack=False)
    xarray = data[...,0] # First row! Need to get this on a matrix-form!
    numOfColumns = data.shape[1]
    Y = data[...,(numOfColumns-1)]
    return data, xarray, Y
    
dataTrain, xarrayTrain, YTrain = loadCSV('cl_train_1.csv')
dataTest, xarrayTest, YTest = loadCSV('cl_test_1.csv')


[0.713 0.199 0.685 0.39  0.223 0.768 0.277 0.678 0.075 0.805 0.457 0.562
 0.732 0.245 0.774 0.424 0.984 0.877 0.791 0.429 0.644 0.836 0.412 0.883
 0.592 0.764 0.52  0.206 0.587 0.    0.304 0.069 0.576 0.253 0.498 0.348
 0.211 0.824 0.368 0.77  0.218 0.611 0.204 0.262 0.242 0.966 0.383 0.745
 0.782 0.31  0.121 0.316 0.69  0.149 0.898 0.662 0.023 0.799 0.871]


In [93]:
# Place 1 in position 0 in each x-array in the matrix X:

def addOnes(data,xArrayWithoutOnes):
    X = np.ones((len(xArrayWithoutOnes), (data.shape[1])))
    place = 0
    
    #1D
    if data.shape[1] == 2:
        for obs in xArrayWithoutOnes:
            # Put one obs in each place in X
            X[place,1] = obs
            place+=1
    #2D
    if data.shape[1] == 3:
        place = 0
        for obs in data:
            # Put one obs in each place in X
            X[place,1]= obs[1]
            X[place,2] = obs[2]
            place+=1
    return X

XTrain = addOnes(dataTrain,xarrayTrain)
XTest = addOnes(dataTest, xarrayTest)

In [123]:
# Logistic Regression with Gradient descent. Defining functions:
# z = h(x)
def linearSignal(w, x):
    return (w.T).dot(x)

# Sigma, the sigmoid function. Returns a number between 0 and 1. 
def logistic(z):
    return ( 1 / (1 + math.exp(-z)))
    
# Returns a probability that the datapoint (1,x1,x2) is in y.
def prob(w,x,y):
    z = linearSignal(w,x)
    probability = (logistic(z)**y)*(1 - logistic(z))**(1-y)
    return probability

# Calculating the cross-entropy error on the training and the test-set:
def CEE(X,Y,trainedWeights):

    error = 0
    place = 0
    for y in Y:
        z = linearSignal(trainedWeights,X[place])
        sigmaZ = logistic(z)
        error = error + (y* (np.log(sigmaZ)) + (1 - y)*np.log(1-sigmaZ))
        place += 1
        print(error)
    return error/(Y.shape[0])


In [129]:
# Gradient descent:

def gradientDescent(X,Y,LR,weights,iterations):
    it = 0
    while it <= iterations:
        
        place = 0
        summ = 0
        
        for x in X:

            summ = summ + (logistic(linearSignal(weights,x)) - Y[place])*x
            place+=1
            
        weights = weights - (LR*summ)
        it+=1
        
    # Calcualting the error with CEE for each iteration:
    
    return weights

# Setting the initial weights in the range -0.5 to 0.5:
initWeights = np.array([1, 1,1])

# learning rate in the range -0.5 to 0.5:
#trained_Weights1 = gradientDescent(XTrain,YTrain, 0.5,initWeights, 10)
#trained_Weights2 = gradientDescent(XTrain,YTrain, 0.5,initWeights, 100)
#trained_Weights3 = gradientDescent(XTrain,YTrain, 0.5,initWeights, 1000)
#trained_Weights4 = gradientDescent(XTrain,YTrain, 0.5,initWeights, 5000)
#trained_Weights5 = gradientDescent(XTrain,YTrain, 0.5,initWeights, 8000)
trained_Weights6 = gradientDescent(XTrain,YTrain, 1,initWeights, 1000)

# print(trained_Weights1)
# print(trained_Weights2)
# print(trained_Weights3)
# print(trained_Weights4)
# print(trained_Weights5)
print(trained_Weights6)

error1 = CEE(XTrain, YTrain,trained_Weights6 )
print("error: ",error1)





# Looks like it is converging!

[-11.78942054   2.63680604  38.47197314]
-1.493551155381965e-05
-1.4935513485607712e-05
-2.268154456669526e-05
-2.268154485157849e-05
-2.2681545079840342e-05
-6.157750222209914e-05
-6.15775026848401e-05
-8.306814628835804e-05
-8.306814697736245e-05
-8.306814717431601e-05
-8.30681476357247e-05
-9.478651130682266e-05
-9.4786511562396e-05
-9.478651285291924e-05
-0.0001091050229831908
-0.00010910502333779603
-0.00011996072251684286
-0.00018586728496773798
-0.00019452045726930467
-0.00019452045793321804
-0.00021169608423275688
-0.00027521412221078584
-0.00027521412252142624
-0.00030147322090481415
-0.0003014732211623859
-0.0003132225244771043
-0.00032357502363294515
-0.00032357502405149923
-0.0003235750243932259
-0.000323575024732288
-0.0003235750249678773
-0.00032357502589358127
-0.0003405256963073742
-0.00034052569676389793
-0.000340525697356757
-0.000340525698660825
-0.00034052570058750603
-0.00037665344562279443
-0.00037665344745532855
-0.0004035431517435896
-0.00040354315214060537
-0.0