In [1]:
import autograd.numpy as np  
from autograd import grad, elementwise_grad
from sklearn.linear_model import LogisticRegression, Ridge, LinearRegression, Lasso, SGDRegressor
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
import seaborn as sns
from torchvision import datasets, transforms
from torch.utils.data import random_split, DataLoader
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import os
import pandas as pd
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter
from PIL import Image
import random
from sklearn import datasets
from sklearn.datasets import make_blobs, load_breast_cancer, make_classification
from sklearn.neural_network import MLPClassifier

In [17]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_der(x):
    return sigmoid(x) * (1 - sigmoid(x))

def derivate(func):
    return elementwise_grad(func)

def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return np.where(x > 0, 1, 0)
    
def binary_cross_entropy(predictions, targets):
    predictions = np.clip(predictions, 1e-9, 1 - 1e-9)  # Avoid log(0)
    return -np.mean(targets * np.log(predictions) + (1 - targets) * np.log(1 - predictions))

def binary_cross_entropy_der(predictions, targets):
    predictions = np.clip(predictions, 1e-9, 1 - 1e-9)  # Avoid division by zero
    return (predictions - targets) / (predictions * (1 - predictions))

def r2_score(predictions, targets):
    ss_total = np.sum((targets - np.mean(targets)) ** 2)
    ss_residual = np.sum((targets - predictions) ** 2)
    return 1 - (ss_residual / ss_total)

def create_layers_batch(network_input_size, layer_output_sizes): 
    layers = []
    i_size = network_input_size
    for layer_output_size in layer_output_sizes:
        W = np.random.randn(i_size, layer_output_size) * np.sqrt(2 / (i_size + layer_output_size))
        b = np.zeros(layer_output_size)
        layers.append((W, b))
        i_size = layer_output_size
    return layers

def feed_forward_batch(inputs, layers, activation_funcs):
    a = inputs
    for (W, b), activation_func in zip(layers, activation_funcs):
        z = (a @ W) + b
        a = activation_func(z)
    return a

def feed_forward_saver_batch(inputs, layers, activation_funcs):
    layer_inputs = []
    zs = []
    a = inputs  
    for (W, b), activation_func in zip(layers, activation_funcs):
        layer_inputs.append(a)
        z = a @ W + b  
        a = activation_func(z)
        zs.append(z)
    return layer_inputs, zs, a 

def backpropagation_batch(input_batch, layers, activation_funcs, targets,activation_ders, cost_der):
    batch_size = input_batch.shape[0]
    layer_inputs, zs, predict = feed_forward_saver_batch(input_batch, layers, activation_funcs)
    layer_grads = [() for _ in layers]

    for i in reversed(range(len(layers))):
        layer_input, z, activation_der = layer_inputs[i], zs[i], activation_ders[i]
        
        if i == len(layers) - 1:
            dC_da = cost_der(predict, targets)
        else:
            (W, b) = layers[i + 1][:2]
            dC_da = dC_dz @ W.T 
        
        dC_dz = dC_da * activation_der(z)
        dC_dW = layer_input.T @ dC_dz / batch_size
        dC_db = np.mean(dC_dz, axis=0)
        
        layer_grads[i] = (dC_dW, dC_db)

    return layer_grads

def train_nn_classification(inputs, targets, val_inputs, val_targets,test_inputs, test_targets, layers, activation_funcs, activation_ders, learning_rate=0.001, epochs=100, batch_size=32):
    n_samples = len(inputs)
    training_metrics = {'loss': [], 'accuracy': []}
    validation_metrics = {'loss': [], 'accuracy': []}
    test_metrics = {'loss': [], 'accuracy': []}
    
    for epoch in range(epochs):
        # Shuffle the data
        indices = np.random.permutation(n_samples)
        inputs_shuffled = inputs[indices]
        targets_shuffled = targets[indices]
        # Mini-batch training
        for i in range(0, n_samples, batch_size):
            batch_inputs = inputs_shuffled[i:i + batch_size]
            batch_targets = targets_shuffled[i:i + batch_size]
            # Compute gradients
            layer_grads = backpropagation_batch(batch_inputs, layers, activation_funcs, batch_targets, activation_ders, binary_cross_entropy_der)
            # Update weights and biases
            for j, ((W, b), (dW, db)) in enumerate(zip(layers, layer_grads)):
                layers[j] = (W - learning_rate * dW, b - learning_rate * db)
                
        if epoch % 10 == 0:
            predictions = feed_forward_batch(inputs, layers, activation_funcs)
            acc = accuracy_score(targets, (predictions > 0.5).astype(int))
            print(f'Epoch {epoch}, Training Accuracy: {acc:.4f}')
        
        # Calculate metrics on training set
        predictions_train = feed_forward_batch(inputs, layers, activation_funcs)
        loss_train = binary_cross_entropy(predictions_train, targets)
        accuracy_train = accuracy_score(targets, (predictions_train > 0.5).astype(int))
        training_metrics['loss'].append(loss_train)
        training_metrics['accuracy'].append(accuracy_train)

        # Calculate metrics on validation set
        predictions_val = feed_forward_batch(val_inputs, layers, activation_funcs)
        loss_val = binary_cross_entropy(predictions_val, val_targets)
        accuracy_val = accuracy_score(val_targets, (predictions_val > 0.5).astype(int))
        validation_metrics['loss'].append(loss_val)
        validation_metrics['accuracy'].append(accuracy_val)

        # Calculate metrics on test set
        predictions_test = feed_forward_batch(test_inputs, layers, activation_funcs)
        loss_test = binary_cross_entropy(predictions_test, test_targets)
        accuracy_test = accuracy_score(test_targets, (predictions_test > 0.5).astype(int))
        test_metrics['loss'].append(loss_test)
        test_metrics['accuracy'].append(accuracy_test)

    return layers, training_metrics, validation_metrics, test_metrics

In [None]:
np.random.seed(32)
# Load and prepare data
data = load_breast_cancer()
X = data.data 
y = data.target

# Split the data into train, validation, and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Scale the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Convert y_train and y_test to one-hot encoding for compatibility 
y_train_onehot = np.eye(2)[y_train]
y_val_onehot = np.eye(2)[y_val]

# Network configuration
input_size = X_train.shape[1]
layer_output_sizes = [50, 2]  # Output layer has 2 neurons for binary classification 
activation_funcs = [relu, sigmoid]
activation_ders = [relu_derivative, sigmoid_der]

# Create layers
layers = create_layers_batch(input_size, layer_output_sizes)

# Get initial predictions and accuracy
initial_predictions = feed_forward_batch(X_val, layers, activation_funcs)
initial_pred_classes = np.argmax(initial_predictions, axis=1) 
initial_acc = accuracy_score(y_val, initial_pred_classes)
print(f'Initial accuracy before training: {initial_acc:.4f}')

# Confusion matrix before training
initial_conf_matrix = confusion_matrix(y_val, initial_pred_classes)
print("\nConfusion Matrix before training on validation:")
print(initial_conf_matrix)

# Train the network
layers, training_metrics, validation_metrics = train_nn_classification(X_train, y_train_onehot, X_val, y_val_onehot, layers, activation_funcs, activation_ders, learning_rate=0.01, epochs=100, batch_size=32)

# Get final predictions and accuracy
final_predictions = feed_forward_batch(X_val, layers, activation_funcs)
final_pred_classes = np.argmax(final_predictions, axis=1) 
final_acc = accuracy_score(y_val, final_pred_classes)
print(f'\nFinal accuracy after training on validation: {final_acc:.4f}')

# Confusion matrix after training
final_conf_matrix = confusion_matrix(y_val, final_pred_classes)
print("\nConfusion Matrix after training on validation:")
print(final_conf_matrix)

In [19]:
from torchvision import datasets, transforms

transform = transforms.Compose([
    transforms.Resize((128, 128)),  # Resize the images
    transforms.ToTensor(),  # Convert to tensor
])

# Loading the dataset using ImageFolder
dataset_path = '/home/laura/Desktop/OsloUni/ML/Project3/archive/'
dataset = datasets.ImageFolder(dataset_path, transform=transform)
# The ImageFolder class assumes the dataset is organized into subfolders representing class labels.
# The `transform` argument ensures that all images undergo the specified transformations.

# Split data
train_size = int(0.7 * len(dataset))  # 70% of the dataset for training
val_size = int(0.15 * len(dataset))   # 15% of the dataset for validation
test_size = len(dataset) - train_size - val_size  # Remaining 15% for testing

train_set, val_set, test_set = random_split(dataset, [train_size, val_size, test_size])

# Creating DataLoaders for each set
trainloader = DataLoader(train_set, batch_size=32, shuffle=True)  # Shuffle the training data for randomness
valloader = DataLoader(val_set, batch_size=32, shuffle=False)     # No shuffling for validation
testloader = DataLoader(test_set, batch_size=32, shuffle=False)   # No shuffling for testing

# converts the DataLoader objects to NumPy arrays, which are more suitable for scikit-learn models such as logistic regression
def dataloader_to_numpy(dataloader):
    data_list = []
    label_list = []
    for images, labels in dataloader:
        images = images.view(images.size(0), -1).numpy()  # Flatten images
        # This flattens each image into a 1D vector. 
        # The images are initially in the shape (batch_size, 3, 128, 128) (batch size, channels, height, width). 
        # After flattening, the shape becomes (batch_size, 3 * 128 * 128)
        data_list.append(images)
        label_list.append(labels.numpy())
    return np.vstack(data_list), np.hstack(label_list)

X_train, y_train = dataloader_to_numpy(trainloader)
X_val, y_val = dataloader_to_numpy(valloader)
X_test, y_test = dataloader_to_numpy(testloader)

# Normalization
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

In [20]:
# Convert y_train and y_test to one-hot encoding for compatibility 
y_train_onehot = np.eye(2)[y_train]
y_val_onehot = np.eye(2)[y_val]
y_test_onehot = np.eye(2)[y_test]

# Network configuration
input_size = X_train.shape[1]
layer_output_sizes = [16, 32, 64, 2]  # Output layer has 2 neurons for binary classification 
activation_funcs = [relu, relu, relu, sigmoid]
activation_ders = [relu_derivative, relu_derivative, relu_derivative, sigmoid_der]

# Create layers
layers = create_layers_batch(input_size, layer_output_sizes)

# Get initial predictions and accuracy
initial_predictions = feed_forward_batch(X_val, layers, activation_funcs)
initial_pred_classes = np.argmax(initial_predictions, axis=1) 
initial_acc = accuracy_score(y_val, initial_pred_classes)
print(f'Initial accuracy before training: {initial_acc:.4f}')

# Confusion matrix before training
initial_conf_matrix = confusion_matrix(y_val, initial_pred_classes)
print("\nConfusion Matrix before training on validation:")
print(initial_conf_matrix)

# Train the network
layers, training_metrics, validation_metrics, test_metrics = train_nn_classification(X_train, y_train_onehot, X_val, y_val_onehot, X_test, y_test_onehot, layers, activation_funcs, activation_ders, learning_rate=0.001, epochs=30, batch_size=32)

# Get final predictions and accuracy
final_predictions = feed_forward_batch(X_val, layers, activation_funcs)
final_pred_classes = np.argmax(final_predictions, axis=1) 
final_acc = accuracy_score(y_val, final_pred_classes)
print(f'\nFinal accuracy after training on validation: {final_acc:.4f}')

# Confusion matrix after training
final_conf_matrix = confusion_matrix(y_val, final_pred_classes)
print("\nConfusion Matrix after training on validation:")
print(final_conf_matrix)

# Test
# Get final predictions and accuracy
final_predictions = feed_forward_batch(X_test, layers, activation_funcs)
final_pred_classes = np.argmax(final_predictions, axis=1) 
final_acc = accuracy_score(y_test, final_pred_classes)
print(f'\nFinal accuracy after training on test: {final_acc:.4f}')

# Confusion matrix after training
final_conf_matrix = confusion_matrix(y_test, final_pred_classes)
print("\nConfusion Matrix after training on test:")
print(final_conf_matrix)

Initial accuracy before training: 0.3846

Confusion Matrix before training on validation:
[[ 44 234]
 [ 70 146]]
Epoch 0, Training Accuracy: 0.7256
Epoch 10, Training Accuracy: 0.8101
Epoch 20, Training Accuracy: 0.7720

Final accuracy after training on validation: 0.7895

Confusion Matrix after training on validation:
[[218  60]
 [ 44 172]]

Final accuracy after training on test: 0.8145

Confusion Matrix after training on test:
[[226  53]
 [ 39 178]]
