In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt


In [None]:
device = ('cuda' if torch.cuda.is_available() else 'cpu')   # Check wether gpu is available

### The Mandelbrot set
The Mandelbrot set is a two-dimensional set that is defined in the complex plane as the complex numbers $c$ for which the function $f_c(z) = z^2 + c $ does not diverge to infinity when iterated starting at $z=0$.

Interesting properties:
- A point c belongs to the Mandelbrot set iff $|z| \leq 2$ for all $n \geq 0$


### Creating a dataset

In [None]:
def mandelbrot_grid_dataset(nx=750, ny=750, xlim=(-2.0, 1.0), ylim =(-1.5, 1.5), max_iter=1000):
    xs = np.linspace(xlim[0], xlim[1], nx)
    ys = np.linspace(ylim[0], ylim[1], ny)
    X, Y = np.meshgrid(xs, ys, indexing="xy")
    
    C = X + 1j * Y
    Z = np.zeros_like(C)
    mask = np.ones(C.shape, dtype=bool)

    for _ in range(max_iter):
        Z[mask] = Z[mask] * Z[mask] + C[mask]
        # Update mask for those that remain within radius 2
        mask[mask] = (np.abs(Z[mask]) <= 2.0)

    # If after max_iter still True, it's considered inside the set
    in_set = mask
    return X, Y, in_set

### Neural Network

In [8]:
class NeuralNet(nn.Module):
    def __init__(self, num_hidden_layers=3):
        super().__init__()
        
        layers = [
            nn.Linear(2, 32),
            nn.ReLU(),
        ]

        for _ in range(num_hidden_layers):
            layers.append(
                nn.Linear(32, 32)
            )
            layers.append(
                nn.ReLU()
            )
        
        # Output
        layers.append(
            nn.Linear(32, 1)
        )
        self.sigmoid_layer = nn.Sigmoid()
        self.network = nn.Sequential(*layers)

    def forward(self, x):
        x = self.network(x)
        return self.sigmoid_layer(x)

### Visualization Function

In [None]:
def plot_decision_boundary(model, epoch, real_res=400, imag_res=400, outdir="images"):
    """
    Generates and saves an image of the model's decision boundary.
    """
    # Define the range for the real and imaginary parts
    real_range = np.linspace(-2, 1, real_res)
    imag_range = np.linspace(-1.5, 1.5, imag_res)
    
    # Create a grid of points
    grid = []
    for a in real_range:
        for b in imag_range:
            grid.append([a, b])
            
    grid_tensor = torch.tensor(grid, dtype=torch.float32)

    model.to(device)
    # Set the model to evaluation mode
    model.eval()
    with torch.no_grad():
        grid_tensor = grid_tensor.to(device)
        # Get the model's predictions (probabilities)
        predictions = model(grid_tensor)
    
    # Reshape the predictions back into a 2D image format
    image_data = predictions.cpu().numpy().reshape((real_res, imag_res))
    
    plt.figure(figsize=(6, 6))
    plt.imshow(image_data.T, extent=[-2, 1, -1.5, 1.5], origin='lower', cmap='hot')
    plt.title(f'Decision Boundary at Epoch {epoch+1}')
    plt.xlabel('Real')
    plt.ylabel('Imaginary')
    
    # Save the figure to a file
    filename = f"images/mandelbrot_epoch_{epoch+1}.png"
    plt.savefig(filename)
    # plt.close()
    print(f"Saved image: {filename}")


### Training Loop

In [None]:
from torch.utils.data import TensorDataset, DataLoader

def train(model, X_train, y_train, X_val, y_val, num_epoch=20, batch_size=16, lr=0.0001, criterion=nn.BCELoss(), visualize=False):
    optimizer = optim.Adam(model.parameters(), lr=lr)

    # Load model into GPU
    model.to(device)
    
    # Convert data to PyTorch tensors
    X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).unsqueeze(1) # Convert target to float and unsqueeze for BCEWithLogitsLoss

    # Create a TensorDataset and DataLoader
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)

    # Convert data to PyTorch tensors
    X_val_tensor = torch.tensor(X_val.values, dtype=torch.float32)
    y_val_tensor = torch.tensor(y_val.values, dtype=torch.float32).unsqueeze(1) # Convert target to float and unsqueeze for BCEWithLogitsLoss

    # Create a TensorDataset and DataLoader
    val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    train_losses = []
    val_losses = []

    # Training loop
    for epoch in range(num_epoch):
        model.train() # Set the model to training mode
        train_loss = 0
        for batch_X, batch_y in train_loader:
            # Load into GPU
            batch_X = batch_X.to(device)
            batch_y = batch_y.to(device)

            # Forward pass
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)

            # Backward pass and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.item() * batch_X.size(0)
        
        model.eval() # Set the model to training mode
        val_loss = 0
        with torch.no_grad():
            for batch_X, batch_y in val_loader:
                # Load into GPU
                batch_X = batch_X.to(device)
                batch_y = batch_y.to(device)

                outputs = model(batch_X)
                loss = criterion(outputs, batch_y)

                val_loss += loss.item() * batch_X.size(0)
        
        avg_train_loss = train_loss / len(train_loader.dataset)
        avg_val_loss = val_loss / len(val_loader.dataset)
        
        # Store losses
        train_losses.append(avg_train_loss)
        val_losses.append(avg_val_loss)
        print(f"Epoch: {epoch+1}, Train Loss: {avg_train_loss}, Val Loss: {avg_val_loss}")
        
        # Plotting the set
        if visualize:
            if (epoch + 1) % 10 == 0:    # Every 10 epoch
                plot_decision_boundary(model, epoch)

    # Plot the learning curves
    plt.figure(figsize=(6, 4))
    plt.plot(train_losses, label="Training loss")
    plt.plot(val_losses, label="Validation loss")
    plt.title("Learning Curves")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legend()
    plt.show()

    return model

### Testing Loop

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score, roc_auc_score
from torch.utils.data import TensorDataset, DataLoader

def test(model, X_test, y_test, threshold=0.5):
    # Convert data to Pytorch tensors
    X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
    y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).unsqueeze(1)

    model.to(device)
    # Evaluation mode
    model.eval() 

    with torch.no_grad():
        # Load into GPU
        X_test_tensor = X_test_tensor.to(device)
        y_prob_tensor = model(X_test_tensor)
        
        # Convert probabilities to binary predictions (0 or 1)
        y_pred = (y_prob_tensor > threshold).float()

        y_true_np = y_test_tensor.cpu().numpy()
        y_pred_np = y_pred.cpu().numpy()
        y_prob_np = y_prob_tensor.cpu().numpy()

        # Calculate metrics
        auc = roc_auc_score(y_true_np, y_prob_np)
        print(f"Test AUC: {auc:.4f}\n")
        
        accuracy = accuracy_score(y_true_np, y_pred_np)
        print(f"Test Accuracy: {accuracy:.4f}\n")
        
        print("Classification Report:")
        print(classification_report(y_true_np, y_pred_np))
        
        print("Confusion Matrix:")
        print(confusion_matrix(y_true_np, y_pred_np))

        f1 = f1_score(y_true_np, y_pred_np, pos_label=1)
        tn, fp, fn, tp = confusion_matrix(y_true_np, y_pred_np).ravel()

        report = {
            "accuracy": accuracy,
            "f1_score_positive": f1,
            "auc": auc,
            "true_positives": tp,
            "false_positives": fp,
            "true_negatives": tn,
            "false_negatives": fn
        }
    
    return report

### Choosing Model

We will use the validation set to test the different models and choose which one we want to use.

In [None]:
model_configs = {
    "1_hidden_layer": NeuralNet(num_hidden_layers=1),
    "3_hidden_layers": NeuralNet(num_hidden_layers=3),
    "5_hidden_layers": NeuralNet(num_hidden_layers=5),
    "7_hidden_layers": NeuralNet(num_hidden_layers=7),
    "9_hidden_layers": NeuralNet(num_hidden_layers=9),
}

results = {}

for name, model in model_configs.items():
    print(f"Training model: {name}")
    
    # Train the model
    trained_model = train(model, X_train, y_train, X_val, y_val, num_epoch=25)
    
    # Test the model and store the returned metrics
    results[name] = test(trained_model, X_val, y_val)

In [None]:
results_df = pd.DataFrame(results).T
results_df

### Test the chosen architecture

Test it on the test set

In [None]:
model = NeuralNet(num_hidden_layers=5).to(device)

model = train(model, X_train, y_train, X_val, y_val)
report = test(model, X_test, y_test)

### Visualization

In [None]:
vis_model = NeuralNet(num_hidden_layers=5)

trained_vis_model = train(
    vis_model,
    X_train,
    y_train,
    X_val,
    y_val,
    num_epoch=50,
    visualize=True
)

print("\nVisualization complete!")