In [1]:
import logging
from src.dataset import create_dataloaders
from src.utils import load_and_preprocess_data, split_and_scale_data
from src.train import train_and_evaluate_model

import optuna

In [2]:
logging.basicConfig(
    filename='training_logs.log',
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)

In [3]:
geo_csv_path = "dataframes/threshold_df.csv"
curated_csv_path = "dataframes/molab_df_curated.csv"
label_col = 'label-1RN-0Normal'
exclude_columns = ['label-1RN-0Normal', 'Patient ID', 'id', 'BASELINE_TIME_POINT', "CROSSING_TIME_POINT", "BASELINE_VOLUME"]

geo_df, exclude_columns = load_and_preprocess_data(geo_csv_path, curated_csv_path, label_col, exclude_columns)
geo_df_train, geo_df_test = split_and_scale_data(
    geo_df, label_col, [col for col in geo_df.columns if col not in exclude_columns]
)

In [5]:
batch_size = 32
epochs = 50
hidden_size = 64
num_layers = 3

dataloaders, feature_columns = create_dataloaders(geo_df_train, label_col, exclude_columns, batch_size)
train_and_evaluate_model(
    dataloaders, feature_columns, geo_df_test, exclude_columns,
    num_epochs=epochs, hidden_size=hidden_size, num_layers=num_layers, batch_size=batch_size, learning_rate=0.001
)