# Notebook for Model Training
Requires python 3.9-3.12.

In [70]:
# imports
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import torchvision.transforms as transforms

class AuroraDataset(Dataset):
    def __init__(self, X, Y, transform=None):
        """
        Args:
            X (numpy.ndarray): Feature matrix
            Y (numpy.ndarray): Target vector
            transform (callable, optional): Optional transform to be applied on features
        """
        # Convert numpy arrays to torch tensors
        self.X = torch.tensor(X, dtype=torch.float32)
        self.Y = torch.tensor(Y, dtype=torch.float32)
        
        self.transform = transform
    
    def __len__(self):
        """
        Return the total number of samples in the dataset
        """
        return len(self.Y)
    
    def __getitem__(self, idx):
        """
        Generate one sample of data
        """
        features = self.X[idx]
        target = self.Y[idx]
        
        # Optional transform
        if self.transform:
            features = self.transform(features)
        
        return features, target

In [71]:
def create_aurora_dataloader(X, Y, batch_size=32, shuffle=True, transform=None):
    """
    Create a DataLoader for Aurora dataset
    
    Args:
        X (numpy.ndarray): Feature matrix
        Y (numpy.ndarray): Target vector
        batch_size (int): Number of samples per batch
        shuffle (bool): Whether to shuffle the data
        transform (callable, optional): Optional transform to be applied on features
    
    Returns:
        torch.utils.data.DataLoader: DataLoader for the Aurora dataset
    """
    dataset = AuroraDataset(X, Y, transform)
    
    dataloader = DataLoader(
        dataset, 
        batch_size=batch_size, 
        shuffle=shuffle,
        num_workers=0  # Set to 0 or adjust based on your system
    )
    
    return dataloader

In [72]:
class AuroraNN(torch.nn.Module):
    def __init__(self, input_size):
        """
        Simple neural network for aurora visibility classification
        
        Args:
            input_size (int): Number of input features
        """
        super(AuroraNN, self).__init__()
        
        # Define network layers
        self.layers = torch.nn.Sequential(
            torch.nn.Linear(input_size, 64),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.3),
            torch.nn.Linear(64, 32),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.3),
            torch.nn.Linear(32, 1),
            torch.nn.Sigmoid()  # Binary classification
        )
    
    def forward(self, x):
        """
        Forward pass
        """
        return self.layers(x)

In [73]:
def train_aurora_model(X_train, Y_train, X_test, Y_test, epochs=50, learning_rate=0.001):
    """
    Train the aurora visibility classification model
    
    Args:
        X_train (numpy.ndarray): Training feature matrix
        Y_train (numpy.ndarray): Training target vector
        X_test (numpy.ndarray): Test feature matrix
        Y_test (numpy.ndarray): Test target vector
        epochs (int): Number of training epochs
        learning_rate (float): Learning rate for optimizer
    
    Returns:
        torch.nn.Module: Trained model
    """
    # Create dataloaders
    train_loader = create_aurora_dataloader(X_train, Y_train, batch_size=32, shuffle=True)
    test_loader = create_aurora_dataloader(X_test, Y_test, batch_size=32, shuffle=False)
    
    # Initialize model
    model = AuroraNN(input_size=X_train.shape[1])
    
    # Loss and optimizer
    criterion = torch.nn.BCELoss()  # Binary Cross Entropy for binary classification
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    
    # Training loop
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        
        for batch_features, batch_targets in train_loader:
            # Zero the parameter gradients
            optimizer.zero_grad()
            
            # Forward pass
            outputs = model(batch_features)
            loss = criterion(outputs.squeeze(), batch_targets)
            
            # Backward pass and optimize
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item()
        
        # Validation/Test evaluation
        model.eval()
        test_loss = 0
        correct = 0
        total = 0
        
        with torch.no_grad():
            for batch_features, batch_targets in test_loader:
                outputs = model(batch_features)
                test_batch_loss = criterion(outputs.squeeze(), batch_targets)
                test_loss += test_batch_loss.item()
                
                # Calculate accuracy
                predicted = (outputs.squeeze() > 0.5).float()
                correct += (predicted == batch_targets).sum().item()
                total += batch_targets.size(0)
        
        # Print epoch statistics
        print(f'Epoch [{epoch+1}/{epochs}]')
        print(f'Training Loss: {total_loss/len(train_loader):.4f}')
        print(f'Test Loss: {test_loss/len(test_loader):.4f}')
        print(f'Test Accuracy: {100 * correct / total:.2f}%')
    
    return model

In [74]:
# import data
dataset = np.load("data_processed/aurorae_dataset.npy", allow_pickle=True)

# every column but the last ('aurora_visible')
X = dataset[:, :-1]
# last column ('aurora_visible')
Y = dataset[:, -1]

X_train = X[:500]
Y_train = Y[:500]
X_test = X[-167:]
Y_test = Y[-167:]

In [75]:
train_aurora_model(X_train, Y_train, X_test, Y_test)

TypeError: can't convert np.ndarray of type numpy.object_. The only supported types are: float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint64, uint32, uint16, uint8, and bool.