# Predicting sickness based on iris data


First of all, we import the necessary dependencies

In [2]:
import numpy as np
import pandas as pd

## Storing and parsing the data

Then, we need to convert the csv dataset to a Pandas dataframe and parse the necessary values needed for our model later on (if any).

In [3]:
all_iris_df = pd.read_csv('iris_dataset.csv')

## Splitting into train and test dataframes
We need to create 2 new dataframes, one for training the model and the other for testing.
- Training will be used to calculate the appropriate weights that the model assigns to each class.
- Testing will be used to calculate the accuracy of the model.

In [4]:
# Change the train and test percentages here
# Try for all 3 values: 80-20, 70-30, 60-40
train_size = 0.8

# Randomly sample the data
train_iris_df = all_iris_df.sample(frac=train_size)

# Get the remaining data for testing
test_iris_df = all_iris_df.drop(train_iris_df.index)

We also need to separate the classes from the attributes

In [5]:
train_attributes =  np.array(train_iris_df.drop('variety', axis=1))
train_class = np.array(train_iris_df['variety'])

test_attributes =  np.array(test_iris_df.drop('variety', axis=1))
test_class = np.array(test_iris_df['variety'])

## Training the model
We'll be using a Multiclass Perceptron algorithm to predict the outcomes in our model.<br><br>
Firstly, we need to initialize the weights for each class and the learning rate arbitrarily.<br>
We also need to intialize a flag that sets to false if our model is already 100% accurate in an epoch.<br>

In [None]:
# Each nth element of a class' weight belongs to the nth attributes of the iris data
# [     w1     ,     w2     ,     w3      ,     w4     ]
# [sepal.length, sepal.width, petal.length, petal.width]

weights = {
    'setosa': np.random.rand(4),
    'versicolor': np.random.rand(4),
    'virginica': np.random.rand(4)
}

learning_rate = 0.1

# Flag to check if the weights are accuracte
inaccurate = True

# Counter for the number of iterations
epoch = 0

# The classes of the iris dataset
classes = ['setosa', 'versicolor', 'virginica']

# The limit for amount of epochs in case final weights are hard to find
max_epochs = 20000

Now, we train our model and calculate the final weights.<br>
Here's how Multiclass Perceptron works in our case:
1. For each epoch:
    - For each training example:
        - Calculate the weighted sum for each class.
        - Predict the class with the highest score.
        - If the prediction is incorrect:
            - Update the weights for the true class and the predicted class.
            - Set the flag `inaccurate` to True.
    - If `inaccurate` is False, break the loop.
2. The final weights are stored in the `weights` dictionary.

For a more detailed explanation of the algorithm, view [`algorithm.txt`](./algorithm.txt)

In [None]:
while inaccurate and epoch < max_epochs:
    inaccurate = False
    for index, data in enumerate(train_attributes):
        setosa_sum = np.dot(weights['setosa'], data)
        versicolor_sum = np.dot(weights['versicolor'], data)
        virginica_sum = np.dot(weights['virginica'], data)

        # Get index of the highest sum and get the name of the class
        prediction = classes[np.argmax([setosa_sum, versicolor_sum, virginica_sum])]

        expected = train_class[index].lower()

        # If the prediction is wrong...
        if prediction != expected:
            inaccurate = True
            
            # Increase weight for expected class
            weights[expected] = weights[expected] + (learning_rate * data)

            # Decrease weight for predicted class
            weights[prediction] = weights[prediction] - (learning_rate * data)
        
        
    print("Epoch:", epoch + 1)
    print("Weights:", weights)
    epoch += 1

print(epoch)
print(weights)


Epoch: 1
Weights: {'setosa': array([1, 1, 1, 1]), 'versicolor': array([0, 0, 0, 0]), 'virginica': array([0, 0, 0, 0])}
Epoch: 1
Weights: {'setosa': array([0.42, 0.73, 0.49, 0.81]), 'versicolor': array([0, 0, 0, 0]), 'virginica': array([0.58, 0.27, 0.51, 0.19])}
Epoch: 1
Weights: {'setosa': array([-0.37,  0.35, -0.15,  0.61]), 'versicolor': array([0, 0, 0, 0]), 'virginica': array([1.37, 0.65, 1.15, 0.39])}
Epoch: 1
Weights: {'setosa': array([-0.37,  0.35, -0.15,  0.61]), 'versicolor': array([0.66, 0.3 , 0.44, 0.14]), 'virginica': array([0.71, 0.35, 0.71, 0.25])}
Epoch: 1
Weights: {'setosa': array([-0.37,  0.35, -0.15,  0.61]), 'versicolor': array([0.66, 0.3 , 0.44, 0.14]), 'virginica': array([0.71, 0.35, 0.71, 0.25])}
Epoch: 1
Weights: {'setosa': array([-0.37,  0.35, -0.15,  0.61]), 'versicolor': array([1.18, 0.57, 0.83, 0.28]), 'virginica': array([0.19, 0.08, 0.32, 0.11])}
Epoch: 1
Weights: {'setosa': array([-0.37,  0.35, -0.15,  0.61]), 'versicolor': array([0.55, 0.3 , 0.34, 0.1 ]), '

KeyboardInterrupt: 