In [27]:
# 1. Load a classification dataset using scikit-learn
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import numpy as np

In [28]:
# Load the Iris dataset
data = load_iris()
X, y = data.data, data.target

In [29]:
# 2. Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [30]:
# 3. Create and train a logistic regression model
log_reg = LogisticRegression(max_iter=200)
log_reg.fit(X_train, y_train)


In [31]:
# 4. Make predictions on the test set
y_pred = log_reg.predict(X_test)

In [32]:
# Print out the predictions
print("Predicted class labels for the test set:")
print(y_pred)

Predicted class labels for the test set:
[1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0]


In [33]:
# 5. Calculate and display the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy of the logistic regression model: {accuracy:.2f}")


Accuracy of the logistic regression model: 1.00


In [34]:
print("\nActual labels vs. Predicted labels:")
for actual, predicted in zip(y_test, y_pred):
    print(f"Actual: {actual}, Predicted: {predicted}")


Actual labels vs. Predicted labels:
Actual: 1, Predicted: 1
Actual: 0, Predicted: 0
Actual: 2, Predicted: 2
Actual: 1, Predicted: 1
Actual: 1, Predicted: 1
Actual: 0, Predicted: 0
Actual: 1, Predicted: 1
Actual: 2, Predicted: 2
Actual: 1, Predicted: 1
Actual: 1, Predicted: 1
Actual: 2, Predicted: 2
Actual: 0, Predicted: 0
Actual: 0, Predicted: 0
Actual: 0, Predicted: 0
Actual: 0, Predicted: 0
Actual: 1, Predicted: 1
Actual: 2, Predicted: 2
Actual: 1, Predicted: 1
Actual: 1, Predicted: 1
Actual: 2, Predicted: 2
Actual: 0, Predicted: 0
Actual: 2, Predicted: 2
Actual: 0, Predicted: 0
Actual: 2, Predicted: 2
Actual: 2, Predicted: 2
Actual: 2, Predicted: 2
Actual: 2, Predicted: 2
Actual: 2, Predicted: 2
Actual: 0, Predicted: 0
Actual: 0, Predicted: 0


In [35]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [36]:
def cost_function(h, y):
    return (-y * np.log(h) - (1 - y) * np.log(1 - h)).mean()

In [37]:
# Gradient descent function to minimize the logistic regression cost function
def gradient_descent(X, h, y):
    return np.dot(X.T, (h - y)) / y.shape[0]

In [38]:
# Function to train the logistic regression model
def train_logistic_regression(X, y, num_steps, learning_rate):
    # Add intercept term to X
    intercept = np.ones((X.shape[0], 1))
    X = np.concatenate((intercept, X), axis=1)

    # Initialize weights
    weights = np.zeros(X.shape[1])

    # Perform gradient descent
    for step in range(num_steps):
        z = np.dot(X, weights)
        h = sigmoid(z)
        gradient = gradient_descent(X, h, y)
        weights -= learning_rate * gradient

        # Print the cost every 1000 iterations
        if step % 1000 == 0:
            print(f"Cost at step {step}: {cost_function(h, y)}")

    return weights

In [39]:
# Function to predict labels using logistic regression
def predict_logistic_regression(X, weights):
    # Add intercept term to X
    intercept = np.ones((X.shape[0], 1))
    X = np.concatenate((intercept, X), axis=1)

    # Predict probabilities
    probabilities = sigmoid(np.dot(X, weights))
    # Convert probabilities to 0 or 1 using a threshold of 0.5
    return np.array([1 if p >= 0.5 else 0 for p in probabilities])

In [41]:
# For simplicity, let's convert it to a binary classification problem
# by only using class 0 and class 1
binary_filter = y < 2
X, y = X[binary_filter], y[binary_filter]

In [42]:
# Split the dataset into training and testing sets (as before)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [43]:
# 2. Train your custom logistic regression model using the same dataset
weights = train_logistic_regression(X_train, y_train, num_steps=30000, learning_rate=0.1)

Cost at step 0: 0.6931471805599453
Cost at step 1000: 0.008180869223862807
Cost at step 2000: 0.004342363918026025
Cost at step 3000: 0.0030039139463461
Cost at step 4000: 0.002314223425170706
Cost at step 5000: 0.0018907803246513415
Cost at step 6000: 0.0016031581590757755
Cost at step 7000: 0.0013944476402037108
Cost at step 8000: 0.0012357633407562168
Cost at step 9000: 0.0011108479741958797
Cost at step 10000: 0.0010098346129687058
Cost at step 11000: 0.0009263793634768574
Cost at step 12000: 0.0008562132926715649
Cost at step 13000: 0.0007963556074412762
Cost at step 14000: 0.0007446603055388856
Cost at step 15000: 0.0006995420812116412
Cost at step 16000: 0.0006598037970570556
Cost at step 17000: 0.0006245241062817682
Cost at step 18000: 0.0005929820670522065
Cost at step 19000: 0.0005646052598111964
Cost at step 20000: 0.0005389332676487477
Cost at step 21000: 0.0005155914534008451
Cost at step 22000: 0.0004942717928622528
Cost at step 23000: 0.00047471864032302264
Cost at step 

In [44]:
# 3. Make predictions on the test set
y_pred = predict_logistic_regression(X_test, weights)
# 4. Calculate and display the accuracy of your custom model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy of the custom logistic regression model: {accuracy:.2f}")

Accuracy of the custom logistic regression model: 1.00
