In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

df = pd.read_csv('data/iris-data.csv')
df = df.dropna()
df.info()

df['class'].replace(["Iris-setosa", "Iris-versicolor"], [1, 0], inplace=True)

inp_df = df.drop(df.columns[[4]], axis=1)  #here axis 1 for column
out_df = df.drop(df.columns[[0, 1, 2, 3]], axis=1)

scaler = StandardScaler()
inp_df = scaler.fit_transform(inp_df)

new_c = (np.zeros(shape=(inp_df.shape[0], 1)) + 1)
inp_df = np.concatenate((inp_df, new_c), axis=1)

X_train, X_test, y_train, y_test = train_test_split(inp_df, out_df, test_size=0.2, random_state=42)

def model(X, W):
    # Logistic regression model implementation
    z = np.dot(X, W)
    y = 1 / (1 + np.exp(-z))
    return y

def loss_bce(y_true, y_pred):
    # Binary cross entropy loss
    n_samples = y_true.shape[0]
    loss = -np.sum(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred)) / n_samples
    return loss

def fit(num_weights, epoch=10, lr=0.01, batch_size=32):
    # Initialization
    W = np.zeros(num_weights)

    # Epochs start
    for e in range(1, epoch + 1):
        total_loss = 0.0
        num_batches = X_train.shape[0] // batch_size

        for batch in range(num_batches):
            start = batch * batch_size
            end = start + batch_size

            X_batch = X_train[start:end]
            y_batch = y_train.iloc[start:end].values.flatten()  # Convert y_batch to a flattened array

            y_pred = model(X_batch, W)
            loss = loss_bce(y_batch, y_pred)
            total_loss += loss

            for j in range(W.shape[0]):
                # Calculate derivative against parameters
                dw_j = np.dot(X_batch[:, j], (y_pred - y_batch))
                # Update parameters
                W[j] = W[j] - lr * dw_j

        avg_loss = total_loss / num_batches
        print(e, ". Loss:", avg_loss, ", W:", W)

    return W

fit(5, epoch=10, lr=0.01, batch_size=32)


<class 'pandas.core.frame.DataFrame'>
Int64Index: 95 entries, 0 to 99
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   sepal_length_cm  95 non-null     float64
 1   sepal_width_cm   95 non-null     float64
 2   petal_length_cm  95 non-null     float64
 3   petal_width_cm   95 non-null     float64
 4   class            95 non-null     object 
dtypes: float64(4), object(1)
memory usage: 4.5+ KB
1 . Loss: 0.6141974112728121 , W: [-0.04773024  0.19680973 -0.27644736 -0.27860205 -0.01285647]
2 . Loss: 0.3812040356581473 , W: [-0.07567639  0.32674729 -0.46398295 -0.46774688 -0.02563349]
3 . Loss: 0.27025497793655195 , W: [-0.09318175  0.42005285 -0.60160829 -0.6066262  -0.03743004]
4 . Loss: 0.20838080853515162 , W: [-0.104927    0.49192295 -0.70939404 -0.71540785 -0.04799567]
5 . Loss: 0.1694546655085359 , W: [-0.1132375   0.55003559 -0.79774021 -0.80455956 -0.05742033]
6 . Loss: 0.14283499948695957 , W: [-0.119

array([-0.13283576,  0.73806516, -1.09210083, -1.10133365, -0.09254077])

In [2]:
print(y_train)

    class
71      0
20      1
83      0
84      0
35      1
..    ...
65      0
76      0
19      1
97      0
56      0

[76 rows x 1 columns]
