In [1]:
import pandas as pd
import numpy as np

In [67]:
class LogisticRegression:

    def __init__(self, learning_rate=0.001, n_iters=1000):
        self.lr = learning_rate
        self.n_iters = n_iters
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        n_samples, n_features = X.shape

        # init parameters
        self.weights = np.zeros(n_features)
        self.bias = 0

        # gradient descent
        for _ in range(self.n_iters):
            # approximate y with linear combination of weights and x, plus bias
            linear_model = np.dot(X, self.weights) + self.bias
            # apply sigmoid function
            y_predicted = self._sigmoid(linear_model)

            # compute gradients
            dw = (1 / n_samples) * np.dot(X.T, (y_predicted - y))
            db = (1 / n_samples) * np.sum(y_predicted - y)
            # update parameters
            self.weights -= self.lr * dw
            self.bias -= self.lr * db

    def predict(self, X):
        linear_model = np.dot(X, self.weights) + self.bias
        y_predicted = self._sigmoid(linear_model)
        y_predicted_cls = [1 if i > 0.5 else 0 for i in y_predicted]
        return np.array(y_predicted_cls)

    def _sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

In [68]:
df=pd.read_csv('C:/Users/user/Desktop/IVY WORK BOOK/MACHINE LEARNING/Python Datasets/Classification Datasets/IRIS.csv')

In [78]:
df.head()

Unnamed: 0,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [79]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Sepal.Length  150 non-null    float64
 1   Sepal.Width   150 non-null    float64
 2   Petal.Length  150 non-null    float64
 3   Petal.Width   150 non-null    float64
 4   Species       150 non-null    int64  
dtypes: float64(4), int64(1)
memory usage: 7.0 KB


In [71]:
df=df.dropna()

In [76]:
df['Species'].unique()

array(['setosa', 'versicolor', 'virginica'], dtype=object)

In [77]:
map_target={'setosa':0,'versicolor':1,'virginica':2}
df['Species']=df['Species'].map(map_target)

In [103]:
df['Species'].value_counts()

2    50
1    50
0    50
Name: Species, dtype: int64

In [72]:
df.columns

Index(['Sepal.Length', 'Sepal.Width', 'Petal.Length', 'Petal.Width',
       'Species'],
      dtype='object')

In [108]:
predictors=['Sepal.Length', 'Sepal.Width', 'Petal.Length', 'Petal.Width']
target=['Species']

X=df[predictors].values
y=df[target].values


In [109]:
X.shape,y.shape

((150, 4), (150, 1))

In [110]:
X=np.reshape(X,(150,4))
y=np.reshape(y,(150,))

In [111]:
log=LogisticRegression(learning_rate=0.1)
log.fit(X,y)
pred=log.predict(X)

In [113]:
pred.shape

(150,)