In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("iris_synthetic_data.csv")
df.sample(5)

Unnamed: 0,sepal length,sepal width,petal length,petal width,label
2814,6.6,3.3,5.5,2.0,Iris-virginica
1496,5.1,2.5,3.1,1.1,Iris-versicolor
2708,6.1,2.9,4.8,1.8,Iris-virginica
1563,5.8,2.6,3.9,1.2,Iris-versicolor
2932,6.3,2.5,5.0,1.9,Iris-virginica


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3000 entries, 0 to 2999
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal length  3000 non-null   float64
 1   sepal width   3000 non-null   float64
 2   petal length  3000 non-null   float64
 3   petal width   3000 non-null   float64
 4   label         3000 non-null   object 
dtypes: float64(4), object(1)
memory usage: 117.3+ KB


In [7]:
df.describe()

Unnamed: 0,sepal length,sepal width,petal length,petal width
count,3000.0,3000.0,3000.0,3000.0
mean,5.865267,3.051833,3.767367,1.191
std,0.805073,0.412472,1.751183,0.758022
min,4.3,2.0,0.9,0.1
25%,5.1,2.8,1.5,0.3
50%,5.8,3.0,4.3,1.3
75%,6.4,3.3,5.2,1.8
max,7.9,4.4,6.9,2.5


In [9]:
df.isnull().sum()

sepal length    0
sepal width     0
petal length    0
petal width     0
label           0
dtype: int64

In [11]:
X = df.iloc[:,:-1]
X

Unnamed: 0,sepal length,sepal width,petal length,petal width
0,5.2,3.8,1.5,0.3
1,5.3,4.1,1.5,0.1
2,4.8,3.1,1.5,0.2
3,5.2,3.7,1.5,0.2
4,4.9,3.0,1.5,0.3
...,...,...,...,...
2995,7.2,3.6,6.0,2.5
2996,7.3,3.0,6.2,2.1
2997,6.9,3.2,5.7,2.3
2998,7.5,2.8,6.0,2.0


In [13]:
y = df.iloc[:,-1]
y

0          Iris-setosa
1          Iris-setosa
2          Iris-setosa
3          Iris-setosa
4          Iris-setosa
             ...      
2995    Iris-virginica
2996    Iris-virginica
2997    Iris-virginica
2998    Iris-virginica
2999    Iris-virginica
Name: label, Length: 3000, dtype: object

In [15]:
label_to_index = {label: idx for idx, label in enumerate(np.unique(y))}
index_to_label = { idx: label for label, idx in label_to_index.items()}

y = np.array([label_to_index[label] for label in y])
num_classes = len(label_to_index)

In [17]:
def one_hot_encoding(y, num_classes):
    one_hot_labels = np.zeros((y.size, num_classes))
    one_hot_labels[np.arange(y.size),y]=1
    return one_hot_labels

y_encoded = one_hot_encoding(y, num_classes)
y_encoded

array([[1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       ...,
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.]])

In [19]:
from sklearn.model_selection import train_test_split 
X_train_val, X_test, y_train_val, y_test = train_test_split(X, y_encoded, test_size=0.15, random_state=42, stratify=y_encoded)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.1764, random_state=42, stratify=y_train_val )

In [20]:
X_train, y_train

(      sepal length  sepal width  petal length  petal width
 273            5.1          3.6           1.5          0.4
 2752           6.3          3.0           5.5          1.8
 2100           6.9          3.2           5.7          2.3
 1909           5.0          2.3           3.4          1.0
 546            4.8          3.0           1.3          0.1
 ...            ...          ...           ...          ...
 1620           6.0          2.3           4.3          1.3
 974            4.5          2.9           1.5          0.2
 2738           6.8          3.1           5.5          2.1
 562            5.1          3.8           1.5          0.3
 519            4.8          3.1           1.6          0.2
 
 [2100 rows x 4 columns],
 array([[1., 0., 0.],
        [0., 0., 1.],
        [0., 0., 1.],
        ...,
        [0., 0., 1.],
        [1., 0., 0.],
        [1., 0., 0.]]))

In [23]:
X_val, y_val

(      sepal length  sepal width  petal length  petal width
 541            5.1          3.8           1.6          0.4
 465            5.1          3.5           1.5          0.4
 1484           5.7          3.0           3.9          1.3
 1344           5.8          3.0           4.5          1.5
 317            5.3          3.7           1.5          0.2
 ...            ...          ...           ...          ...
 1956           6.2          2.9           4.6          1.4
 277            4.6          3.1           1.5          0.2
 1161           5.8          2.6           3.9          1.1
 2656           5.8          2.8           5.2          2.4
 1116           6.3          3.3           4.6          1.6
 
 [450 rows x 4 columns],
 array([[1., 0., 0.],
        [1., 0., 0.],
        [0., 1., 0.],
        ...,
        [0., 1., 0.],
        [0., 0., 1.],
        [0., 1., 0.]]))

In [25]:
X_test, y_test

(      sepal length  sepal width  petal length  petal width
 2214           6.6          3.0           5.7          2.2
 885            5.1          3.3           1.6          0.4
 2337           7.2          3.0           6.0          1.7
 1827           4.9          2.4           3.2          1.0
 310            5.1          3.3           1.6          0.4
 ...            ...          ...           ...          ...
 1014           5.3          2.7           3.9          1.4
 918            5.0          3.4           1.5          0.2
 1323           5.7          2.9           4.3          1.4
 1390           5.6          2.5           3.9          1.1
 215            4.6          3.2           1.5          0.2
 
 [450 rows x 4 columns],
 array([[0., 0., 1.],
        [1., 0., 0.],
        [0., 0., 1.],
        ...,
        [0., 1., 0.],
        [0., 1., 0.],
        [1., 0., 0.]]))

In [27]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)
X_train, X_val, X_test

(array([[-0.95015346,  1.31798403, -1.29388981, -1.04245304],
        [ 0.53870783, -0.12256062,  0.99194228,  0.80582337],
        [ 1.28313847,  0.35762093,  1.10623388,  1.46592209],
        ...,
        [ 1.1590667 ,  0.11753015,  0.99194228,  1.2018826 ],
        [-0.95015346,  1.79816558, -1.29388981, -1.17447279],
        [-1.32236879,  0.11753015, -1.23674401, -1.30649253]]),
 array([[-0.95015346,  1.79816558, -1.23674401, -1.04245304],
        [-0.95015346,  1.07789325, -1.29388981, -1.04245304],
        [-0.20572282, -0.12256062,  0.07760944,  0.14572465],
        ...,
        [-0.08165104, -1.08292373,  0.07760944, -0.11831484],
        [-0.08165104, -0.60274218,  0.82050487,  1.59794183],
        [ 0.53870783,  0.5977117 ,  0.47763006,  0.54178388]]),
 array([[ 0.91092315, -0.12256062,  1.10623388,  1.33390235],
        [-0.95015346,  0.5977117 , -1.23674401, -1.04245304],
        [ 1.6553538 , -0.12256062,  1.27767129,  0.67380363],
        ...,
        [-0.20572282, -0.36

In [40]:
class multi_class_classification:
    def __init__(self, learning_rate = 0.01, n_iteration = 3000, beta = 0.95, epsilon = 1e-8):
        self.learning_rate = learning_rate
        self.n_iteration = n_iteration
        self.beta = beta 
        self.epsilon = epsilon
        self.weights = None
        self.bias = None
        self.losses = []
        self.v_dw = None
        self.v_db = None

    def softmax(self, z):
        exp_z = np.exp(z-np.max(z, axis = 1, keepdims = True))
        return exp_z/np.sum(exp_z, axis = 1, keepdims = True)

    def compute_cost(self, y, y_hat):
        loss = -np.sum(y*np.log(y_hat + self.epsilon))/y.shape[0]
        return loss
        
    def update_params_rms_prop(self, dw, db):
        self.v_dw = self.beta * self.v_dw + (1-self.beta) * (dw ** 2)
        self.v_db = self.beta * self.v_db + (1-self.beta) * (db ** 2)

        self.weights -= (self.learning_rate * dw )/(np.sqrt(self.v_dw + self.epsilon))
        self.bias -= (self.learning_rate * db )/(np.sqrt(self.v_db + self.epsilon))
        
    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.n_classes = y.shape[1]

        self.weights = np.zeros((n_features, self.n_classes))
        self.bias = np.zeros((1, self.n_classes))

        self.v_dw = np.zeros((n_features, self.n_classes))
        self.v_db = np.zeros(1, self.n_classes)

        for i in range(self.n_iteration):
            f_wb = np.dot(X, self.weights)+self.bias
            y_hat = self.softmax(f_wb)

            dw = (1/n_samples) * np.dot(X.T, (y_hat - y))
            db = (1/n_samples) * np.sum(y_hat - y, axis=0, keepdims=True)

            loss = self.compute_cost(y, y_hat)
            self.losses.append(loss)

            self.update_params_rms_prop(dw,db)

    def predict_proba(self, X):
        f_wb = np.dot(X, self.weights)+self.bias
        return self.softmax(f_wb)

    def predict(self, X):
        proba = self.predict_proba(X)
        return np.argmax(proba, axis = 1)