# Logistic Regression Classifier

## Import Libraries

In [1]:
import pandas as pd
import numpy as np

# Import model made from scratch
from Models.linear_model import LogisticRegression

## Import Dataset
Using the iris dataset, only keeping class 1 and 2 for binary classification problem.

In [2]:
# Import iris dataset from sklearn
from sklearn.datasets import load_iris
iris = load_iris()

In [3]:
iris_df = pd.DataFrame(iris.data, columns=iris.feature_names)
iris_df['target'] = iris.target

# Remove class 2 to make it a binary classification problem
iris_df = iris_df[iris_df['target'] != 2]
iris_df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
95,5.7,3.0,4.2,1.2,1
96,5.7,2.9,4.2,1.3,1
97,6.2,2.9,4.3,1.3,1
98,5.1,2.5,3.0,1.1,1


## Evaluate on Iris Dataset

In [4]:
# Split data into train and test sets
from sklearn.model_selection import train_test_split

X = iris_df.drop('target', axis=1)
y = iris_df['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

In [5]:
# Standardize data
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [6]:
# Instantiate the model
lr = LogisticRegression(learning_rate=0.0001, epoch=1000)

# Fit the model
lr.fit(X_train, y_train)

# Make predictions
y_pred = lr.predict(X_test)

In [7]:
# Evaluate the model
from sklearn.metrics import classification_report

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00        10

    accuracy                           1.00        20
   macro avg       1.00      1.00      1.00        20
weighted avg       1.00      1.00      1.00        20



In [8]:
# Compare with sklearn's LogisticRegression
from sklearn.linear_model import LogisticRegression as skLogisticRegression

sk_lr = skLogisticRegression(max_iter=1000)
sk_lr.fit(X_train, y_train)
sk_y_pred = sk_lr.predict(X_test)

print(classification_report(y_test, sk_y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00        10

    accuracy                           1.00        20
   macro avg       1.00      1.00      1.00        20
weighted avg       1.00      1.00      1.00        20



# External Dataset Evaluation
`UNCOMMENT TO TRY YOUR OWN DATA`

In [9]:
# DATA = "ENTER PATH TO DATA HERE"
# TARGET = "ENTER TARGET COLUMN NAME HERE"
# LEARNING_RATE = 0.0001
# EPOCH = 1000

# df = pd.read_csv(DATA)
# X = df.drop(TARGET, axis=1)
# y = df[TARGET]

# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# scaler = StandardScaler()
# X_train = scaler.fit_transform(X_train)
# X_test = scaler.transform(X_test)

# lr = LogisticRegression(learning_rate=LEARNING_RATE, epoch=EPOCH)
# lr.fit(X_train, y_train)
# y_pred = lr.predict(X_test)

# print(classification_report(y_test, y_pred))