In [469]:
# Import needed libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [470]:
#Defines Global Variables data entries
TOTAL_DATA = 150

# Splits data into training(80%) and testing(20%)
TRAINING_DATA = int(TOTAL_DATA*0.8) 
TESTING_DATA = int(TOTAL_DATA*0.2)

# Number of input features
FEATURE_NUMBER = 4

# Number of classes and respective indexing
CLASS_NUMBER = 3
CLASSNAMES = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']

In [471]:
# Read in data
data = pd.read_csv('../DataSets/Iris.csv')

#Splits into 2 classes instead of 3
#data = data[0:100]

# Shuffle data
data = data.sample(frac = 1)

data.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
11,12,4.8,3.4,1.6,0.2,Iris-setosa
81,82,5.5,2.4,3.7,1.0,Iris-versicolor
139,140,6.9,3.1,5.4,2.1,Iris-virginica
10,11,5.4,3.7,1.5,0.2,Iris-setosa
44,45,5.1,3.8,1.9,0.4,Iris-setosa


In [472]:
# Pulling feature data and splicing it for training/testing using 80-20 ratio
x_train = data.iloc[:TRAINING_DATA, 1:FEATURE_NUMBER+1].values
x_test = data.iloc[TRAINING_DATA:, 1:FEATURE_NUMBER+1].values

# Standardizes the data by dividing the entries by standard deviation (i.e calculating how many standard deviations the entries are from the center)
x_train = (x_train - np.mean(x_train, axis=0)) / np.std(x_train, axis=0)
x_test = (x_test - np.mean(x_test, axis=0)) / np.std(x_test, axis=0)

# Reshape for matrix multiplication
x_train = x_train.reshape(FEATURE_NUMBER, TRAINING_DATA)
x_test = x_test.reshape(FEATURE_NUMBER, TESTING_DATA)



In [473]:
# Pulling label data and splicing it for training/testing using 80-20 ratio
y_train_data = data.iloc[:TRAINING_DATA, -1].values
y_test_data = data.iloc[TRAINING_DATA:, -1].values

# Create vectorized representations of each data point's class membership
y_train = np.zeros(shape=(CLASS_NUMBER, TRAINING_DATA))
y_test = np.zeros(shape=(CLASS_NUMBER, TESTING_DATA))

# Setting numeric labels for each data point's class
for row in range(CLASS_NUMBER):
    y_train[row, :TRAINING_DATA]  = [ele == CLASSNAMES[row] for ele in y_train_data]
    y_test[row, :TESTING_DATA] = [ele == CLASSNAMES[row] for ele in y_test_data]

In [474]:
# Randomly initialize weights and bias matricies
w = np.random.rand(CLASS_NUMBER, FEATURE_NUMBER)
b = np.random.rand(CLASS_NUMBER, 1)



In [475]:
#Class coresspondance function, returns a vector (z), which will be input into softmax function
#The highest number in z represents the correct class
def class_corr(weights, bias, x):
    
    numData = x.shape[1]
    print(numData)
    z = weights.dot(x)
    #print(z)
    for input in range(numData):
        z[0:CLASS_NUMBER, input:input+1] += bias

    return z

z = class_corr(w, b, x_test)
print(z)



30
[[ 6.23175268e-02 -1.36227672e-01  1.41369588e+00  1.08103756e-01
   2.11248741e-01  2.05196373e+00  1.04263828e+00 -1.05908405e+00
   7.67466552e-01  1.71386914e+00 -3.36762651e-01  4.44366596e-01
   1.36502976e+00 -1.13523549e+00  4.33007535e-01  2.89211333e+00
  -2.22088994e+00 -8.23177191e-01 -1.09599182e+00 -1.17398488e+00
  -8.43281862e-01  1.43907604e-01 -2.86098227e-01  1.56768835e-01
   1.75084373e+00  2.12415850e-01  1.28189441e+00  1.24077762e+00
  -6.24500970e-01  9.77709303e-01]
 [-7.29282006e-01  1.10637223e+00  1.54033498e-01 -1.06606313e+00
   1.40381212e+00  9.55686963e-01  2.13260047e+00  3.76064409e-01
   4.31099109e-01 -1.72722833e-01  5.07542883e-01  9.77550336e-01
   3.02721419e+00  5.81223021e-01  1.41930652e+00  3.30177959e+00
  -1.23876588e+00 -2.66424182e-01 -9.49611541e-01 -8.83467384e-01
   1.15244175e+00  8.86381085e-02  1.27258597e+00  1.88132295e+00
   1.32720839e+00 -2.87496185e-01  1.52369749e+00  1.82166729e+00
  -2.63534444e-01  1.82570998e-01]
 [ 

In [476]:
#Softmax Function
#expnentiates all elements of the z vector and divides by their sum to see class probability
def softmax(z):

    #creates empty probability array
    y_hat = np.empty([z.shape[0], z.shape[1]])
    numData = z.shape[1]

    #exponentiates the matrix
    z_exp = np.exp(z)

    #Converts z vector into probability distribution
    for input in range(numData):
        z_sum = np.sum(z_exp[0:CLASS_NUMBER, input:input+1])
        y_hat[0:CLASS_NUMBER, input:input+1] = z_exp[0:CLASS_NUMBER, input:input+1]/z_sum
    return y_hat


y_pred = softmax(z)
print(softmax(z))


[[0.41740171 0.16837815 0.58673571 0.57875365 0.19716826 0.56403648
  0.20573922 0.16179867 0.43991824 0.67098824 0.24926945 0.27035679
  0.1488822  0.14227033 0.20315787 0.21309469 0.220095   0.28520433
  0.28914821 0.33665671 0.09354414 0.29466705 0.1248339  0.1251864
  0.34107485 0.32032236 0.29876849 0.29553851 0.25763135 0.44883139]
 [0.18913283 0.58336451 0.16648615 0.17887908 0.64977156 0.18845178
  0.6119017  0.67959786 0.31426025 0.10171346 0.57989089 0.46078256
  0.78473009 0.7917039  0.54472569 0.32098773 0.58768044 0.49768079
  0.33472847 0.45015017 0.68825336 0.27882284 0.59328106 0.70229808
  0.22328893 0.19430243 0.38049463 0.52831295 0.36962768 0.2026558 ]
 [0.39346546 0.24825735 0.24677815 0.24236728 0.15306018 0.24751174
  0.18235908 0.15860347 0.24582151 0.2272983  0.17083966 0.26886065
  0.06638771 0.06602577 0.25211644 0.46591758 0.19222456 0.21711487
  0.37612333 0.21319311 0.2182025  0.42651011 0.28188504 0.17251553
  0.43563622 0.48537521 0.32073689 0.17614854 0

In [477]:
#Loss function
#Returns the sum of all probabilities compared to the actual class
def cost(y, y_pred):

    total_loss = 0

    #Finds number of data samples
    numData = y.shape[1]

    #Manipulates y_pred into right form for the loss function
    y_pred = -1*np.log(y_pred)

    #iterates over all data samples
    for column in range(numData):

        #Computes total class loss for each data sample and sums them
        y_cur = y[0:CLASS_NUMBER, column:column+1].transpose()
        y_pred_cur = (y_pred[0:CLASS_NUMBER, column:column+1])
        total_loss += y_cur.dot(y_pred_cur)[0][0]

    #Divdes by number of data samples to find average error
    total_loss = total_loss/numData
    return total_loss
    
    

        



cost(y_test, y_pred)

1.3748808447088445

In [478]:
# Take the gradients of the loss with respect to weights and bias
def findGradients(x, y, y_pred):
    # Number of data samples
    numData = x.shape[0]

    # Calculate error for gradient computations
    error = y_pred - y
    error = error.transpose()

    # Dictionary for holding gradient values
    gradientDict = dict()
    gradientDict['gradWeights'] = np.array((1/numData) * x.dot(error))
    gradientDict['gradBias'] =  (1/numData) * np.sum(error)

    return gradientDict