In [757]:
# Import needed libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [758]:
#Defines Global Variables data entries
TOTAL_DATA = 150

# Splits data into training(80%) and testing(20%)
TRAINING_DATA = int(TOTAL_DATA*0.8) 
TESTING_DATA = int(TOTAL_DATA*0.2)

# Number of input features
FEATURE_NUMBER = 4

# Number of classes and respective indexing
CLASS_NUMBER = 3
CLASSNAMES = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']

In [759]:
# Read in data
data = pd.read_csv('../DataSets/Iris.csv')

#Splits into 2 classes instead of 3
#data = data[0:100]

# Shuffle data
data = data.sample(frac = 1)

data.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
103,104,6.3,2.9,5.6,1.8,Iris-virginica
49,50,5.0,3.3,1.4,0.2,Iris-setosa
0,1,5.1,3.5,1.4,0.2,Iris-setosa
27,28,5.2,3.5,1.5,0.2,Iris-setosa
104,105,6.5,3.0,5.8,2.2,Iris-virginica


In [760]:
# Pulling feature data and splicing it for training/testing using 80-20 ratio
x_train = data.iloc[:TRAINING_DATA, 1:FEATURE_NUMBER+1].values
x_test = data.iloc[TRAINING_DATA:, 1:FEATURE_NUMBER+1].values

# Standardizes the data by dividing the entries by standard deviation (i.e calculating how many standard deviations the entries are from the center)
x_train = (x_train - np.mean(x_train, axis=0)) / np.std(x_train, axis=0)
x_test = (x_test - np.mean(x_test, axis=0)) / np.std(x_test, axis=0)

# Reshape for matrix multiplication
x_train = x_train.reshape(FEATURE_NUMBER, TRAINING_DATA)
x_test = x_test.reshape(FEATURE_NUMBER, TESTING_DATA)



In [761]:
# Pulling label data and splicing it for training/testing using 80-20 ratio
y_train_data = data.iloc[:TRAINING_DATA, -1].values
y_test_data = data.iloc[TRAINING_DATA:, -1].values

# Create vectorized representations of each data point's class membership
y_train = np.zeros(shape=(CLASS_NUMBER, TRAINING_DATA))
y_test = np.zeros(shape=(CLASS_NUMBER, TESTING_DATA))

# Setting numeric labels for each data point's class
for row in range(CLASS_NUMBER):
    y_train[row, :TRAINING_DATA]  = [ele == CLASSNAMES[row] for ele in y_train_data]
    y_test[row, :TESTING_DATA] = [ele == CLASSNAMES[row] for ele in y_test_data]

In [762]:
# Randomly initialize weights and bias matricies
w = np.random.rand(CLASS_NUMBER, FEATURE_NUMBER)
b = np.random.rand(CLASS_NUMBER, 1)



In [763]:
#Class coresspondance function, returns a vector (z), which will be input into softmax function
#The highest number in z represents the correct class
def class_corr(weights, bias, x):
    
    numData = x.shape[0]
    z = weights.dot(x)
    #print(z)
    for input in range(numData):
        z[0:CLASS_NUMBER, input:input+1] += bias

    return z

z = class_corr(w, b, x_test)
z.shape 


(3, 30)

In [764]:
#Softmax Function
#expnentiates all elements of the z vector and divides by their sum to see class probability
def softmax(z):

    #creates empty probability array
    y_hat = np.empty([z.shape[0], z.shape[1]])
    numData = z.shape[0]

    #exponentiates the matrix
    z_exp = np.exp(z)

    #Converts z vector into probability distribution
    for input in range(numData):
        z_sum = np.sum(z_exp[0:CLASS_NUMBER, input:input+1])
        y_hat[0:CLASS_NUMBER, input:input+1] = z_exp[0:CLASS_NUMBER, input:input+1]/z_sum
    return y_hat


softmax(z)


array([[ 0.63405516,  0.38296313,  0.65433425,  0.42328965, -0.07715597,
        -0.68608035,  1.88128163,  1.86577032,  1.36604187,  0.64635351,
        -0.45811521, -0.8786122 ,  0.09259061,  0.02678315, -1.27516967,
         0.72569446, -1.52649351, -1.26641019,  1.41026293,  0.97837831,
         1.6236292 ,  1.64723908, -1.04175767, -0.21575352, -2.24648079,
        -1.71962742, -0.95280246, -1.22182041,  0.44922389,  0.68814984],
       [ 0.18344575,  0.14651644,  0.19657114,  2.10220616, -1.5624777 ,
        -2.45974064,  0.25395219, -0.71632768,  0.35508429, -0.6608705 ,
         0.69462987, -1.21777404,  0.64202334,  0.10990847, -1.30781058,
         2.25419099, -0.98633315, -0.33394466,  2.23540173,  1.69558172,
         2.7478101 ,  2.27760702,  0.31048823, -0.97789591, -1.64039441,
         0.02601551, -1.65476768, -2.04824364,  0.37319668,  0.28371399],
       [ 0.18249909,  0.47052043,  0.14909461,  1.68734904, -1.79427307,
        -2.60891557, -0.3499128 , -1.46630281, -0

In [765]:
#Loss function
#Returns the sum of all probabilities compared to the actual class
def cost(x, y, y_pred):
    # Number of data samples
    numData = x.shape[0]

    total_loss = 0

    for i in range(numData):
        z = class_corr(w, b, x[:, i])
        y_pred = softmax(z)
        y_actual = np.reshape(y[:, i], [1, 3])
        total_loss += -1*(y_actual.dot(np.log(y_pred.transpose())))

    total_loss = total_loss/numData
    return total_loss
    

        



cost(x_train, y_train)

TypeError: cost() missing 1 required positional argument: 'y_pred'

In [None]:
# Take the gradients of the loss with respect to weights and bias
def findGradients(x, y, y_pred):
    # Number of data samples
    numData = x.shape[0]

    # Calculate error for gradient computations
    error = y_pred - y
    error = error.transpose()

    # Dictionary for holding gradient values
    gradientDict = dict()
    gradientDict['gradWeights'] = np.array((1/numData) * x.dot(error))
    gradientDict['gradBias'] =  (1/numData) * np.sum(error)

    return gradientDict