In [141]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [142]:
def signum(x):
    if x >= 0:
        return 1
    else:
        return -1

In [143]:
def perceptron(x, w, b):
    return signum(np.dot(w, x) + b)

In [144]:
def normalizeData(data : pd.DataFrame):
    data = np.abs(data - data.mean()) / data.std()
    return data

In [145]:
data = pd.read_csv('penguins.csv')

# PreProcessing

In [146]:
data[['bill_length_mm', 'bill_depth_mm', 'flipper_length_mm',
      'body_mass_g']] = data[['bill_length_mm', 'bill_depth_mm', 'flipper_length_mm',
                              'body_mass_g']].apply(lambda x: normalizeData(x))

In [147]:
data.gender.fillna(data.gender.mode()[0],inplace=True)
data.gender.unique()

array(['male', 'female'], dtype=object)

In [148]:
#encode male to 0 and female to 1
gender_dict ={"male":1,"female":0}
data.gender = data.gender.apply(lambda x: gender_dict[x])


# Train Test Split

In [160]:
import warnings
warnings.filterwarnings("ignore")
def splitData(data,class1,class2):
    data = data.sample(frac=1).reset_index(drop=True)
    class1_data = data[data.species == class1]
    class2_data = data[data.species == class2]
    test_data = pd.concat([class1_data[:int(len(class1_data)*0.3)],class2_data[:int(len(class2_data)*0.3)]])
    train_data = pd.concat([class1_data[int(len(class1_data)*0.3):],class2_data[int(len(class2_data)*0.3):]])
    print(len(train_data),len(test_data))
    return train_data,test_data

# Model

In [167]:
def train(data,epoch,lr,feature1,feature2,class1,class2,isBias):
    epoch = 1000
    learning_rate = 0.1
    bias = 1
    w = np.array([0, 0])
    b = 0
    data=data[[feature1,feature2,"species"]]
    data = data[(data.species == class1) | (data.species == class2)]
    data = data.reset_index(drop=True)
    data[[feature1,feature2]] = data[[feature1,feature2]].apply(lambda x: normalizeData(x))
    species_dict = {class1:1,class2:-1}
    data.species = data.species.apply(lambda x: species_dict[x])
    for _ in range(epoch):
        for index, row in data.iterrows():
            x = np.array(row)
            y = x[2]
            x = x[0:2]
            if y * perceptron(x, w, b) <= 0:
                w = w + learning_rate * y * x
                if isBias:
                    b = b + learning_rate * y
    return w,b

In [172]:
def test(data,feature1,feature2,class1,class2,w,b):
    data=data[[feature1,feature2,"species"]]
    species_dict = {class1:1,class2:-1}
    data.species = data.species.apply(lambda x: species_dict[x])
    correct = 0
    for index, row in data.iterrows():
        x = np.array(row)
        y = x[2]
        x = x[0:2]
        if y * perceptron(x, w, b) > 0:
            correct += 1
    return correct/len(data)

In [174]:
train_df,test_df = splitData(data,"Adelie","Chinstrap")

70 30


In [175]:
w,b = train(train_df,1000,1,'bill_length_mm','bill_depth_mm','Adelie','Chinstrap',True)
test(test_df,'bill_length_mm','bill_depth_mm','Adelie','Chinstrap',w,0)

0.5