In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
df = pd.read_csv('iris-data.csv')

In [3]:
df.head()

Unnamed: 0,sepal_length_cm,sepal_width_cm,petal_length_cm,petal_width_cm,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [4]:
df.describe()

Unnamed: 0,sepal_length_cm,sepal_width_cm,petal_length_cm,petal_width_cm
count,100.0,100.0,100.0,95.0
mean,5.19394,3.095,2.862,0.816842
std,1.334795,0.475326,1.448565,0.56317
min,0.055,2.0,1.0,0.1
25%,4.975,2.8,1.5,0.2
50%,5.4,3.05,2.45,1.0
75%,5.8,3.4,4.325,1.3
max,7.0,4.4,5.1,1.8


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   sepal_length_cm  100 non-null    float64
 1   sepal_width_cm   100 non-null    float64
 2   petal_length_cm  100 non-null    float64
 3   petal_width_cm   95 non-null     float64
 4   class            100 non-null    object 
dtypes: float64(4), object(1)
memory usage: 4.0+ KB


In [6]:
#Removing all null values row
df = df.dropna()
df.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 95 entries, 0 to 99
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   sepal_length_cm  95 non-null     float64
 1   sepal_width_cm   95 non-null     float64
 2   petal_length_cm  95 non-null     float64
 3   petal_width_cm   95 non-null     float64
 4   class            95 non-null     object 
dtypes: float64(4), object(1)
memory usage: 4.5+ KB


In [7]:
df['class'].replace(["Iris-setosa","Iris-versicolor"], [1,0], inplace=True)

In [8]:
df.head()

Unnamed: 0,sepal_length_cm,sepal_width_cm,petal_length_cm,petal_width_cm,class
0,5.1,3.5,1.4,0.2,1
1,4.9,3.0,1.4,0.2,1
2,4.7,3.2,1.3,0.2,1
3,4.6,3.1,1.5,0.2,1
4,5.0,3.6,1.4,0.2,1


In [9]:
inp_df = df.drop(df.columns[[4]], axis=1)
out_df = df.drop(df.columns[[0,1,2,3]], axis=1)

#
scaler = StandardScaler()
inp_df = scaler.fit_transform(inp_df)

# Adding an extra column to be compatible with bias
new_c = (np.zeros(shape=(inp_df.shape[0],1)) + 1)
inp_df = np.concatenate((inp_df, new_c), axis=1)

#
X_train, X_test, y_train, y_test = train_test_split(inp_df, out_df, test_size=0.2, random_state=42)

In [10]:
X_train[:100]

array([[ 0.65540611, -0.59375803,  0.73869892,  0.86247727,  1.        ],
       [ 0.14025935,  0.65973115, -0.85464475, -1.10111477,  1.        ],
       [ 0.58181371, -0.59375803,  1.50073285,  1.39800238,  1.        ],
       [ 0.14025935, -0.17592831,  1.08507798,  1.21949401,  1.        ],
       [-0.15411022,  0.24190142, -1.20102381, -1.10111477,  1.        ],
       [-0.59566458, -0.17592831, -1.131748  , -1.10111477,  1.        ],
       [ 0.14025935,  1.70430547, -1.131748  , -0.74409804,  1.        ],
       [-0.15411022,  0.45081628, -1.06247219, -1.10111477,  1.        ],
       [ 0.94977568, -0.59375803,  1.15435379,  1.21949401,  1.        ],
       [ 0.21385175,  0.86864601, -1.131748  , -1.10111477,  1.        ],
       [ 0.80259089, -1.22050262,  1.36218123,  1.21949401,  1.        ],
       [ 0.43462893,  1.91322033, -1.20102381, -1.10111477,  1.        ],
       [ 0.36103653, -0.38484317,  0.87725055,  0.86247727,  1.        ],
       [ 0.14025935,  1.70430547, -0.8

In [11]:
y_train[:5]

Unnamed: 0,class
71,0
20,1
83,0
84,0
35,1


In [12]:
def model(X, W): # W is an array of weights
    # Logistic regression model implementation
    #y = np.zeros(shape=(X.shape[0], 1))
    y = 1/(1 + np.e ** -(np.sum(W*X)) )
    
    #y = np.expand_dims(y, axis=1)
    
    return y


In [13]:
def loss_bce(y_true, y_pred, W):
    # Binary cross entropy loss
    #loss = (-1)/y_true.shape[0] * ( np.sum( (y_true * np.log(y_pred)) + ((1-y_true) * np.log(1-y_pred))) )
    loss = (-1)/100 * ( np.sum( (y_true * np.log(y_pred)) + ((1-y_true) * np.log(1-y_pred))) )
    
    return loss

In [30]:
def fit(num_weights, epoch=200, lr = 1):
    # Initialization
    W = np.zeros(num_weights)
    batch_size = 32;
    size = W.shape[0]
    print(size)
    size = X_train.shape[0]
    print(size)
    batch_number = size // batch_size
    print(batch_number)
    # Epochs start
    for e in range(1, epoch+1, batch_number):
    #for e in range(1, epoch+1//batch_size):      
        y_pred = model(X_train, W)
        print(e, ". Loss: ", loss_bce(y_train, y_pred, W), ", W: ", W)
        
        for i in range(W.shape[0]):
            # Calculate derivative against parameters [Implement yourself]
            x_i = X_train[:, i]
            x_i = np.expand_dims(x_i, axis=1)
            
            dw_i = np.mean((y_pred - y_train) * x_i)
            
            # Update parameters [Implement yourself]
            #W[i] = 0
            W[i] = W[i] - lr * dw_i
        
        # Plot the lines
        # Y_hat = w * X + b
        # plt.plot(X, Y_hat, label="Epoch: "+str(i))
    
    # plt.scatter(X, Y, label="dataset")
    # plt.legend()
    # plt.show()
    
    return W

In [31]:
W = fit(5, lr = 0.001)

5
76
2
1 . Loss:  class    0.526792
dtype: float64 , W:  [0. 0. 0. 0. 0.]
3 . Loss:  class    0.526453
dtype: float64 , W:  [-8.75929383e-05  3.25380560e-04 -4.75498811e-04 -4.77571695e-04
 -5.26315789e-05]
5 . Loss:  class    0.526134
dtype: float64 , W:  [-0.00017508  0.00065056 -0.00095093 -0.00095509 -0.0001031 ]
7 . Loss:  class    0.525834
dtype: float64 , W:  [-0.00026246  0.00097554 -0.00142631 -0.00143255 -0.00015144]
9 . Loss:  class    0.525553
dtype: float64 , W:  [-0.00034974  0.00130032 -0.00190162 -0.00190995 -0.0001977 ]
11 . Loss:  class    0.52529
dtype: float64 , W:  [-0.00043693  0.00162492 -0.00237687 -0.0023873  -0.00024192]
13 . Loss:  class    0.525044
dtype: float64 , W:  [-0.00052401  0.00194933 -0.00285206 -0.0028646  -0.00028414]
15 . Loss:  class    0.524814
dtype: float64 , W:  [-0.000611    0.00227355 -0.00332719 -0.00334184 -0.0003244 ]
17 . Loss:  class    0.524599
dtype: float64 , W:  [-0.00069789  0.00259759 -0.00380227 -0.00381904 -0.00036273]
19 . L