In [4]:
import numpy as np
from sklearn.datasets import load_iris  # Sample dataset
from sklearn.model_selection import train_test_split

def load_and_preprocess_data():
    iris = load_iris()
    X = iris.data
    y = iris.target

    # Assuming binary classification for this example
    y = (y == 0).astype(int)  # Target class 0 vs. others

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.25, random_state=42
    )

    return X_train, X_test, y_train, y_test

class LogisticRegression:
    def __init__(self, learning_rate=0.01, num_iterations=1000):
        self.learning_rate = learning_rate
        self.num_iterations = num_iterations
        self.weights = None
        self.bias = None

    def _sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def fit(self, X, y):
        n_samples, n_features = X.shape

        # Initialize parameters
        self.weights = np.zeros(n_features)
        self.bias = 0

        # Gradient descent with optimizations
        for _ in range(self.num_iterations):
            linear_model = np.dot(X, self.weights) + self.bias
            y_predicted = self._sigmoid(linear_model)

            # Gradients (vectorized for efficiency)
            dw = (1 / n_samples) * np.dot(X.T, (y_predicted - y))
            db = (1 / n_samples) * np.sum(y_predicted - y)

            # Update parameters
            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db

    def predict(self, X):
        linear_model = np.dot(X, self.weights) + self.bias
        y_predicted = self._sigmoid(linear_model)
        y_predicted_cls = [1 if i > 0.5 else 0 for i in y_predicted]  # Threshold
        return y_predicted_cls

def accuracy(y_true, y_pred):
    return np.mean(y_true == y_pred)

def precision(y_true, y_pred):
    tp = np.sum((y_true == 1) & (y_pred == 1))
    fp = np.sum((y_true == 0) & (y_pred == 1))
    if tp + fp == 0:
        return 0  # or return None or np.nan if you prefer
    return tp / (tp + fp)

def recall(y_true, y_pred):
    tp = np.sum((y_true == 1) & (y_pred == 1))
    fn = np.sum((y_true == 1) & (y_pred == 0))
    if tp + fn == 0:
        return 0  # or return None or np.nan if you prefer
    return tp / (tp + fn)




In [5]:
X_train, X_test, y_train, y_test = load_and_preprocess_data()

model = LogisticRegression(learning_rate=0.1, num_iterations=1500)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("Accuracy:", accuracy(y_test, y_pred))
print("Precision:", precision(y_test, y_pred))
print("Recall:", recall(y_test, y_pred))


Accuracy: 1.0
Precision: 0
Recall: 0


In [3]:
import numpy as np

# Sigmoid function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# Cost function for logistic regression
def compute_cost(X, y, weights):
    m = len(y)
    h = sigmoid(X @ weights)
    epsilon = 1e-5  # To prevent log(0)
    cost = (1/m) * ((-y).T @ np.log(h + epsilon) - (1 - y).T @ np.log(1 - h + epsilon))
    return cost

# Gradient descent to minimize the logistic regression cost function
def gradient_descent(X, y, weights, learning_rate, iterations):
    m = len(y)
    cost_history = np.zeros(iterations)

    for i in range(iterations):
        weights = weights - (learning_rate/m) * (X.T @ (sigmoid(X @ weights) - y))
        cost_history[i] = compute_cost(X, y, weights)

    return weights, cost_history

# Prediction function
def predict(X, weights):
    return sigmoid(X @ weights) >= 0.5

# Accuracy calculation
def accuracy(y_true, y_pred):
    correct = np.sum(y_true == y_pred)
    return correct / len(y_true)

# Data preparation (example with a simplified dataset)
def prepare_data():
    # Sample dataset: 10 samples with 2 features and a binary target
    X = np.array([
        [5.1, 3.5],
        [4.9, 3.0],
        [7.0, 3.2],
        [6.4, 3.2],
        [5.9, 3.0],
        [5.4, 3.9],
        [6.6, 2.9],
        [5.6, 3.0],
        [6.7, 3.1],
        [5.6, 3.0]
    ])
    y = np.array([0, 0, 1, 1, 1, 0, 1, 0, 1, 0])

    # Feature scaling
    X = (X - np.mean(X, axis=0)) / np.std(X, axis=0)

    # Adding intercept term
    m = X.shape[0]
    X = np.hstack((np.ones((m, 1)), X))

    # Splitting dataset into training (80%) and test (20%) sets
    np.random.seed(42)  # For reproducibility
    shuffle_indices = np.random.permutation(m)
    test_size = int(m * 0.2)
    test_indices = shuffle_indices[:test_size]
    train_indices = shuffle_indices[test_size:]

    X_train, X_test = X[train_indices], X[test_indices]
    y_train, y_test = y[train_indices], y[test_indices]

    return X_train, X_test, y_train, y_test

# Main function to run the logistic regression model
def logistic_regression(learning_rate=0.01, iterations=1000):
    X_train, X_test, y_train, y_test = prepare_data()

    # Initial weights (all zeros)
    weights = np.zeros(X_train.shape[1])

    # Running gradient descent to optimize weights
    weights, cost_history = gradient_descent(X_train, y_train, weights, learning_rate, iterations)

    # Predictions
    y_pred_test = predict(X_test, weights)
    test_accuracy = accuracy(y_test, y_pred_test)

    print(f"Test Accuracy: {test_accuracy*100:.2f}%")

    return weights, cost_history

# Run logistic regression
weights, cost_history = logistic_regression()

# Output weights and cost history for review
print("Optimized weights:", weights)
print("Cost history:", cost_history)


Test Accuracy: 100.00%
Optimized weights: [-0.03967765  1.78988091 -0.5725503 ]
Cost history: [0.69108522 0.68905647 0.68704086 0.68503828 0.68304864 0.68107186
 0.67910784 0.67715649 0.67521771 0.67329143 0.67137754 0.66947597
 0.66758661 0.66570939 0.6638442  0.66199098 0.66014962 0.65832004
 0.65650215 0.65469587 0.65290112 0.6511178  0.64934583 0.64758513
 0.64583561 0.64409719 0.64236979 0.64065333 0.63894771 0.63725287
 0.63556871 0.63389517 0.63223215 0.63057958 0.62893739 0.62730548
 0.62568379 0.62407223 0.62247073 0.62087921 0.6192976  0.61772581
 0.61616379 0.61461144 0.6130687  0.61153549 0.61001174 0.60849737
 0.60699232 0.60549651 0.60400988 0.60253234 0.60106383 0.59960429
 0.59815364 0.59671181 0.59527874 0.59385435 0.59243858 0.59103137
 0.58963265 0.58824234 0.5868604  0.58548674 0.58412132 0.58276406
 0.5814149  0.58007377 0.57874063 0.57741539 0.57609801 0.57478843
 0.57348657 0.57219239 0.57090582 0.5696268  0.56835528 0.5670912
 0.56583449 0.56458511 0.56334299 0.