In [3]:
import os
import sys
import pandas as pd
import numpy as np
from pathlib import Path
import logging
import torch

# Get project root directory
notebook_path = os.getcwd()
PROJECT_ROOT = os.path.dirname(os.path.dirname(notebook_path))

# Add project root to Python path
if PROJECT_ROOT not in sys.path:
    sys.path.append(PROJECT_ROOT)
    print(f"Added {PROJECT_ROOT} to Python path")
# Setup directories
MODEL_DIR = os.path.join(notebook_path, 'model')
RESULTS_DIR = os.path.join(notebook_path, 'results')
os.makedirs(MODEL_DIR, exist_ok=True)
os.makedirs(RESULTS_DIR, exist_ok=True)

# Now import our modules
from preprocessing.data_loader import DataLoader
from models.deep_surv_model import DeepSurvModel
from utils.evaluation import cindex_score
from utils.visualization import plot_survival_curves


Added /Users/jonasschernich/Library/Mobile Documents/com~apple~CloudDocs/Uni/Master/9. Semester/Consulting/Organization/PCaPrognostics to Python path


In [5]:
# Configuration
CONFIG = {
    # Data options
    'USE_COHORTS': True,
    'USE_PCA': False,
    'GENE_TYPE': 'intersection',
    'USE_IMPUTED': True,
    
    # Model parameters
    'HIDDEN_LAYERS': [64, 32],
    'DROPOUT': 0.4,
    'BATCH_SIZE': 64,
    'LEARNING_RATE': 0.01,
    'N_EPOCHS': 10,
    
    # Training options
    'USE_EARLY_STOPPING': True,
    'PATIENCE': 10,
    'VALIDATION_SPLIT': 0.2,
}

In [6]:
deep_surv = DeepSurvModel()

In [7]:
# Split data for validation
if CONFIG['USE_COHORTS']:
    # Use one cohort as validation
    unique_cohorts = np.unique(groups)
    val_cohort = np.random.choice(unique_cohorts)
    val_mask = groups == val_cohort
    train_mask = ~val_mask
    
    X_train, X_val = X[train_mask], X[val_mask]
    y_train, y_val = y[train_mask], y[val_mask]
    validation_data = (X_val, y_val)
else:
    # Random split
    indices = np.random.permutation(len(X))
    split = int(len(X) * (1 - CONFIG['VALIDATION_SPLIT']))
    train_idx, val_idx = indices[:split], indices[split:]
    
    X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
    y_train, y_val = y[train_idx], y[val_idx]
    validation_data = (X_val, y_val)


NameError: name 'groups' is not defined

In [None]:
# Train model
try:
    logger.info("Starting model training...")
    deep_surv.fit_model(
        X=X_train,
        y=y_train,
        validation_data=validation_data,
        hidden_layers=CONFIG['HIDDEN_LAYERS'],
        batch_size=CONFIG['BATCH_SIZE'],
        learning_rate=CONFIG['LEARNING_RATE'],
        n_epochs=CONFIG['N_EPOCHS'],
        early_stopping=CONFIG['USE_EARLY_STOPPING'],
        patience=CONFIG['PATIENCE'],
        dropout=CONFIG['DROPOUT']
    )
    logger.info("Model training completed successfully.")

    # Evaluate on validation set
    val_pred = deep_surv.predict(X_val)
    val_score = cindex_score(y_val, val_pred)
    logger.info(f"\nValidation C-index: {val_score:.3f}")

except Exception as e:
    logger.error(f"\nError during model training: {str(e)}")
    raise

In [None]:
# Plot survival curves
if hasattr(deep_surv, 'model'):
    plot_survival_curves(deep_surv, X_val.iloc[:5], y_val[:5])

logger.info("\nTraining completed!")