In [1]:
#Importing all the different python modules needed

import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from torch.utils.data import TensorDataset, DataLoader, random_split
from ffModel import FeedForward
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.preprocessing import StandardScaler
import optuna

In [2]:
def objective(trial):
    # Setup Hyperparameters to be tested
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-1, log=True)
    dropout_rate = trial.suggest_float("dropout_rate", 1e-1, 1.0)
    batch_size = trial.suggest_int("batch_size", 20, 40)
    epochs = trial.suggest_int("epochs", 100, 200)
    betaA = trial.suggest_float("betaA", 1e-1, 1, log=False)
    betaB = trial.suggest_float("betaB", 1e-1, 1, log=False)
    weight_decay = trial.suggest_float("weight_decay", 1e-5, 1e-1, log=True)
    patient = trial.suggest_int('patient', 1, 101, step = 10)
    scale_factor = trial.suggest_float('scale_factor', 1e-6, 1e-1, log=True)
    
    # Create the model, initialize the weights, setup loss, and optimizer
    model = FeedForward(dropout_rate).to('cuda')
    model.init_weights()
    criterion = nn.MSELoss()
    optimizer = optim.AdamW(model.parameters(), lr=learning_rate, betas= (betaA,betaB), weight_decay=weight_decay)
    
    #importing data into pandas data frame.
    train_data = pd.read_csv('train.csv')
    test_data = pd.read_csv('test.csv')
    
    #Dropping non-data column from testing.
    test_data.drop(['Id'], axis=1, inplace=True)
    
    # Separate target variable from training data and drop it as well as ID variable from training inputs.
    Y_train = train_data['SalePrice']
    X_train = train_data.drop(['SalePrice', 'Id'], axis=1)
    
    # Combine training and test data for consistent encoding(I was getting different sized inputs when using dummies separately).
    combined_data = pd.concat([X_train, test_data], axis=0)
    
    # Handle categorical variables by applying one hot encoding with pandas
    combined_data = pd.get_dummies(combined_data)
    
    # Fill missing values with column mean with pandas
    combined_data = combined_data.fillna(combined_data.mean())
    
    # Separate the combined data back into training and test sets
    X_train = combined_data.iloc[:len(X_train), :]
    X_test = combined_data.iloc[len(X_train):, :]
    
    # Standardize the data using scikit-learn(This is also to help the problem of exploding gradients w/this dataset).
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    
    # Convert to tensors
    X_train = torch.tensor(X_train, dtype=torch.float32)
    Y_train = torch.tensor(Y_train.values, dtype=torch.float32).view(-1, 1)
    X_test = torch.tensor(X_test, dtype=torch.float32)
    
    #create actual testing dataset which we will split into a testing dataset and validation data set
    dataset = TensorDataset(X_train, Y_train)
    train_size = int(.8 * len(dataset))
    val_size = len(dataset) - train_size
    train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
    
    #create final training data loader.
    trainValuesFinal = DataLoader(train_dataset,batch_size= batch_size,  shuffle=True)
    
    #set up plateau scheduler(I found this scheduler to work the best consistently via trial and error)
    scheduler = ReduceLROnPlateau(optimizer, 'min', patience=patient, factor=scale_factor)
    
    # Setup average loss variable for scheduler and then set model to training mode
    average_loss = 0
    model.train()
    
    #Actual training loop, I have a dedicated GPU for ML training, so I use cuda for all of my mats.
    for epoch in range(epochs):
        for inputs, targets in trainValuesFinal:
            inputs = inputs.to('cuda')
            targets = targets.to('cuda')
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 5.0)
            optimizer.step()
            average_loss += loss.item()
        scheduler.step(average_loss/len(trainValuesFinal))
        
    # Set model to eval mode then use the val dataset to evaluate, use torch.no_grade so more efficient as not calculating gradients.
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for inputs, targets in val_dataset:
            inputs = inputs.to('cuda')
            targets = targets.to('cuda')
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            val_loss += loss.item()
    # Actual value which optuna then tries to minimize or maximize based on your choices.
    return val_loss / len(val_dataset) 

In [3]:
#create the study and optimize will 300 trials
study = optuna.create_study(direction='minimize', study_name='HousingPrice')
study.optimize(objective, n_trials=300)

[I 2024-08-01 18:22:19,735] A new study created in memory with name: HousingPrice
[W 2024-08-01 18:22:27,590] Trial 0 failed with parameters: {'learning_rate': 0.07317406651190207, 'dropout_rate': 0.5532144617861772, 'batch_size': 32, 'epochs': 175, 'betaA': 0.9429396737083805, 'betaB': 0.6372050438272204, 'weight_decay': 0.054673428377584425, 'patient': 71, 'scale_factor': 3.4559533154700992e-06} because of the following error: The value nan is not acceptable.
[W 2024-08-01 18:22:27,590] Trial 0 failed with value nan.
[I 2024-08-01 18:22:34,694] Trial 1 finished with value: 26004294668.328766 and parameters: {'learning_rate': 0.06940715432116638, 'dropout_rate': 0.7618598269490245, 'batch_size': 38, 'epochs': 199, 'betaA': 0.52953211512859, 'betaB': 0.36043321988689425, 'weight_decay': 3.522444707791381e-05, 'patient': 51, 'scale_factor': 0.00040523825869060267}. Best is trial 1 with value: 26004294668.328766.
[I 2024-08-01 18:22:43,828] Trial 2 finished with value: 38226083916.712326

KeyboardInterrupt: 

In [None]:
print(f"Best parameters found: {study.best_params}")
print(f"Best validation loss: {study.best_value}")