<a href="https://colab.research.google.com/github/Maruf346/AI-ML-with-python/blob/main/Logistic_Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Iris Dataset Classification**
The Iris dataset is a popular dataset in machine learning, consisting of 150 samples of iris flowers, each
with four features (sepal length, sepal width, petal length, and petal width) and a target variable specifying
the type of iris (Setosa, Versicolour, or Virginica).

In [None]:
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# load the iris dataset
iris = load_iris ()

# split the data into training and testing sets
X_train , X_test , y_train , y_test = train_test_split(iris.data ,
iris.target , test_size =0.3, random_state =15)

# create a logistic regression model
log_reg = LogisticRegression ()

# fit the model on the training data
log_reg.fit(X_train , y_train)

# make predictions on the testing data
y_pred = log_reg.predict(X_test)

# calculate the accuracy of the model
accuracy = accuracy_score(y_test , y_pred)

#print(iris.data)
#print(iris.target)
print("Predicted target values:", y_pred)
print("Actual target values:", y_test)
print("Accuracy:", accuracy)

Predicted target values: [0 1 1 0 0 1 2 1 1 2 2 1 1 1 2 0 1 2 0 2 1 0 1 1 0 0 2 2 2 1 0 2 1 2 0 0 2
 0 0 1 2 0 0 1 2]
Actual target values: [0 1 1 0 0 1 2 1 1 2 2 1 1 1 2 0 1 2 0 2 1 0 1 1 0 0 2 2 2 1 0 2 1 2 0 0 2
 0 0 1 2 0 0 1 2]
Accuracy: 1.0


# **Logistic Regression on Iris dataset using Sepal Length vs Petal Width only.**

In [None]:
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardSc

# Load the iris dataset
iris = load_iris()
X = iris.data[:, [0, 3]]  # Only Sepal Length and Petal Width
y = iris.target

# Split the data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.7, random_state=15
)

# Scaling - Preprocessing...
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train logistic regression model
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)

# Predict
y_pred = log_reg.predict(X_test)

# Accuracy
accuracy = accuracy_score(y_test, y_pred)

# Output
print("Predicted target values:", y_pred)
print("\nActual target values:", y_test)
print("\nAccuracy:", accuracy)


Predicted target values: [0 1 1 0 0 1 2 0 1 2 2 1 1 1 2 0 1 2 0 2 1 0 1 0 0 0 2 2 2 1 0 2 1 2 0 0 2
 0 0 1 2 0 0 1 2 2 0 1 0 2 2 2 2 1 1 1 0 1 2 2 2 2 0 1 1 2 2 0 1 0 0 2 1 1
 1 1 0 2 2 2 0 2 0 0 2 2 1 1 2 0 0 0 1 1 1 0 2 0 1 0 0 0 2 2 2]

Actual target values: [0 1 1 0 0 1 2 1 1 2 2 1 1 1 2 0 1 2 0 2 1 0 1 1 0 0 2 2 2 1 0 2 1 2 0 0 2
 0 0 1 2 0 0 1 2 2 0 1 0 2 2 2 2 1 1 1 0 1 2 2 2 2 0 2 1 2 1 0 1 0 0 2 1 1
 1 1 0 2 2 2 0 1 0 0 2 2 1 1 2 1 0 0 1 1 1 0 2 0 1 0 1 0 2 2 2]

Accuracy: 0.9333333333333333


# **Implement logistic regression from scratch (i.e., without using any machine learning library) and compare the performance with the one of scikit-learn.**

In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Load full Iris dataset (all 4 features)
iris = load_iris()
X = iris.data        # All features
y = iris.target      # Target labels

# Train/test split (70% test set)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.7, random_state=50
)

# Scaling - Preprocessing...
scaler = StandardScaler()
X_train_std = scaler.fit_transform(X_train)
X_test_std = scaler.transform(X_test)

# ========== Logistic Regression from Scratch ==========

def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)

def train_softmax(X, y, lr=0.1, n_iter=4000):
  # Add bias (intercept) column
    Xb = np.hstack([np.ones((X.shape[0], 1)), X])

    n_samples, n_features = Xb.shape
    n_classes = np.unique(y).size

    W = np.zeros((n_classes, n_features))  #weight
    y_onehot = np.eye(n_classes)[y]        #One-hot encoding...

    # Gradient Descent
    for _ in range(n_iter):
        scores = Xb @ W.T
        probs = softmax(scores)
        gradient = (probs - y_onehot).T @ Xb / n_samples
        W -= lr * gradient
    return W

def predict_softmax(X, W):
    Xb = np.hstack([np.ones((X.shape[0], 1)), X])
    scores = Xb @ W.T
    probs = softmax(scores)
    return np.argmax(probs, axis=1)

# Train scratch model
W = train_softmax(X_train_std, y_train)
y_pred_scratch = predict_softmax(X_test_std, W)
acc_scratch = accuracy_score(y_test, y_pred_scratch)

# ========== scikit-learn Logistic Regression ==========
model = LogisticRegression()
model.fit(X_train_std, y_train)
y_pred_sklearn = model.predict(X_test_std)
acc_sklearn = accuracy_score(y_test, y_pred_sklearn)

# ========== Results ==========
print(f"Scratch model accuracy      : {acc_scratch:.6f}")
print(f"scikit-learn model accuracy : {acc_sklearn:.6f}")


Scratch model accuracy      : 0.952381
scikit-learn model accuracy : 0.961905


# **Another approach: Implement logistic regression from scratch (i.e., without using any machine learning library) and compare the performance with the one of scikit-learn.**

In [None]:
import numpy as np

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def predict(X, weights):
    z = np.dot(X, weights)
    return sigmoid(z)

def compute_cost(X, y, weights):
    m = len(y)
    h = predict(X, weights)
    cost = (-1/m) * np.sum(y*np.log(h) + (1-y)*np.log(1 - h))
    return cost

def gradient_descent(X, y, weights, lr, iterations):
    m = len(y)
    for _ in range(iterations):
        h = predict(X, weights)
        gradient = np.dot(X.T, (h - y)) / m
        weights -= lr * gradient
    return weights

# Load Iris
from sklearn.datasets import load_iris
iris = load_iris()

# Only keep 2 classes and 2 features
X = iris.data[:, [0, 3]]  # Sepal length and petal width
y = iris.target
mask = y < 2
X = X[mask]
y = y[mask]

# Normalize
X = (X - np.mean(X, axis=0)) / np.std(X, axis=0)

# Add bias
X = np.hstack((np.ones((X.shape[0], 1)), X))
y = y.reshape(-1, 1)

# Initialize weights and run GD
weights = np.zeros((X.shape[1], 1))
weights = gradient_descent(X, y, weights, lr=0.1, iterations=1000)

# Predict and evaluate
preds = predict(X, weights)
preds = (preds >= 0.5).astype(int)
acc = np.mean(preds == y)
print("Scratch model accuracy:", acc)


from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

X_sklearn = iris.data[mask][:, [0, 3]]
y_sklearn = iris.target[mask]

model = LogisticRegression()
model.fit(X_sklearn, y_sklearn)
preds = model.predict(X_sklearn)
print("Scikit-learn model accuracy:", accuracy_score(y_sklearn, preds))


Scratch model accuracy: 1.0
Scikit-learn model accuracy: 1.0
