Logistic Regression

In [6]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

column_names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']
df = pd.read_csv('iris.data', header=None, names=column_names)
df.dropna(inplace=True)

# Filter only 'Iris-versicolor' and 'Iris-virginica'
df = df[df['species'].isin(['Iris-versicolor', 'Iris-virginica'])]

# Encode labels: versicolor → 0, virginica → 1
df['label'] = df['species'].map({'Iris-versicolor': 0, 'Iris-virginica': 1})

# Separate features and labels
X = df.iloc[:, :-2].values  # only the 4 feature columns
y = df['label'].values

# Normalize features
X = (X - X.mean(axis=0)) / X.std(axis=0)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Sigmoid function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# Cost function
def compute_cost(X, y, weights):
    m = len(y)
    h = sigmoid(np.dot(X, weights))
    cost = -(1/m) * np.sum(y*np.log(h) + (1 - y)*np.log(1 - h))
    return cost

# Gradient descent
def gradient_descent(X, y, weights, lr, epochs):
    m = len(y)
    cost_history = []

    for _ in range(epochs):
        h = sigmoid(np.dot(X, weights))
        gradient = np.dot(X.T, (h - y)) / m
        weights -= lr * gradient
        cost = compute_cost(X, y, weights)
        cost_history.append(cost)

    return weights, cost_history

# Add bias term (intercept)
X_train_ = np.hstack((np.ones((X_train.shape[0], 1)), X_train))
X_test_ = np.hstack((np.ones((X_test.shape[0], 1)), X_test))

# Initialize weights
weights = np.zeros(X_train_.shape[1])

# Train model
weights, cost_history = gradient_descent(X_train_, y_train, weights, lr=0.1, epochs=1000)

# Predict
def predict(X, weights):
    return sigmoid(np.dot(X, weights)) >= 0.5

# Evaluate
y_pred = predict(X_test_, weights)
accuracy = np.mean(y_pred == y_test)
print(f"Accuracy: {accuracy * 100:.2f}%")


Accuracy: 80.00%
