In [None]:
import numpy as np
import h5py
import matplotlib.pyplot as plt

# Load dataset
train_dataset = h5py.File('/content/sample_data/train_catvnoncat.h5', "r")
test_dataset = h5py.File('/content/sample_data/test_catvnoncat.h5', "r")

# Extract data
train_set_x_orig = np.array(train_dataset["train_set_x"][:])
train_set_y_orig = np.array(train_dataset["train_set_y"][:])

# reshape labels to match dimensions
train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))

# Flatten the dataset
train_set_x_flatten = train_set_x_orig.reshape(train_set_x_orig.shape[0], -1).T

# Normalize the data
train_set_x = train_set_x_flatten / 255.0

# Transpose to match sample-row format
train_set_x = train_set_x.T
train_set_y = train_set_y_orig.T

# Split dataset as per the task requirements
last_digit = 7  # Last digit of my student ID
num_train = 160 + last_digit  # New training set size
new_train_set_x = train_set_x[:num_train]
new_train_set_y = train_set_y[:num_train]

new_test_set_x = train_set_x[num_train:]
new_test_set_y = train_set_y[num_train:]

# Sigmoid activation function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# Function to compute the cost
def compute_cost(A, Y):
    m = Y.shape[0]
    cost = -(1/m) * np.sum(Y * np.log(A) + (1 - Y) * np.log(1 - A))
    return cost

# Gradient descent function
def gradient_descent(X, Y, num_iterations, learning_rate):
    n_features = X.shape[1]
    w = np.zeros((n_features, 1))
    b = 0.0
    m = X.shape[0]

    for i in range(num_iterations):
        # Forward propagation
        Z = np.dot(X, w) + b
        A = sigmoid(Z)
        cost = compute_cost(A, Y)

        # Backward propagation
        dw = (1/m) * np.dot(X.T, (A - Y))
        db = (1/m) * np.sum(A - Y)

        # Update parameters
        w -= learning_rate * dw
        b -= learning_rate * db

        # Print cost every 100 iterations
        if i % 100 == 0:
            print(f"Iteration {i}, cost: {cost:.4f}")

    return w, b

# Prediction function
def predict(X, w, b):
    Z = np.dot(X, w) + b
    A = sigmoid(Z)
    return (A > 0.5).astype(int)

# Set hyperparameters
num_iterations = 2000
learning_rate = 0.005

# Train the logistic regression model
w, b = gradient_descent(new_train_set_x, new_train_set_y, num_iterations, learning_rate)

# predictions on the training set and the test set
train_predictions = predict(new_train_set_x, w, b)
test_predictions = predict(new_test_set_x, w, b)

# Calculate accuracy
train_accuracy = np.mean(train_predictions == new_train_set_y) * 100
test_accuracy = np.mean(test_predictions == new_test_set_y) * 100

print(f"Training Set Accuracy: {train_accuracy:.2f}%")
print(f"Test Set Accuracy: {test_accuracy:.2f}%")

# Find indices of incorrectly classified images
incorrect_indices = np.where(test_predictions != new_test_set_y)[0]
print(f"Incorrectly classified indices: {incorrect_indices}")

# Display 4 of them
for i in range(min(4, len(incorrect_indices))):
    index = incorrect_indices[i]
    plt.imshow(train_set_x_orig[num_train + index])
    plt.title(f"Prediction: {test_predictions[index, 0]}, Actual: {new_test_set_y[index, 0]}")
    plt.show()
