In [1]:
import torch
from torch import nn
import torch.utils.data as data
from dataset import SNPmarkersDataset
import torch.nn.functional as F
import wandb
import numpy as np
from scipy.stats import pearsonr
from utils import format_batch
from utils import train_DL_model

In [2]:
class MLP(torch.nn.Module):
    def __init__(self, nlayers: int = 1, hidden_nodes: list[int] = [], dropout: float = 0):
        super(MLP, self).__init__()
        
        if dropout < 0 or dropout >= 1:
            raise AttributeError("The dropout must be between 0 and 1")

        if nlayers < 1:
            raise AttributeError("The number of layers must be greater or equal than one !")
        
        if len(hidden_nodes) != nlayers - 1:
            raise AttributeError(f"Not enough hidden_nodes given, expected a list of length {nlayers - 1} but got one of {len(hidden_nodes)}")

        # Use a copy to avoid modifying the hyperparameter value for future runs
        hidden_nodes_model = hidden_nodes.copy()
        hidden_nodes_model.insert(0, 36304)
        hidden_nodes_model.append(1)

        self.model = nn.Sequential(*[LinearBlock(hidden_nodes_model[i], hidden_nodes_model[i + 1], dropout=dropout) for i in range(nlayers - 1)])
        self.dropout = nn.Dropout(dropout)
        self.output_layer = nn.Linear(hidden_nodes_model[-2], hidden_nodes_model[-1])

    def forward(self, x):
        return self.output_layer(self.dropout(self.model(x)))

class LinearBlock(torch.nn.Module):
    def __init__(self, input_size, output_size, dropout = 0):
        super(LinearBlock, self).__init__()
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(in_features=input_size, out_features=output_size)
    
    def forward(self,x):
        return F.relu(self.fc(x))

In [3]:
BATCH_SIZE = 4
LEARNING_RATE = 1e-3
DROPOUT = 0.25
N_LAYERS = 2
HIDDEN_NODES = [1024]
N_EPOCHS = 5
SCHEDULER_STEP_SIZE = 20
SCHEDULER_REDUCE_RATIO = 0.5

In [4]:
train_dataset = SNPmarkersDataset(mode = "local_train", skip_check=True)
validation_dataset = SNPmarkersDataset(mode = "validation", skip_check=True)
selected_phenotypes = ["ep_res", "de_res", "FESSEp_res", "FESSEa_res"]

train_dataset.set_phenotypes = selected_phenotypes
train_dataloader = data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers = 4)
        
validation_dataset.set_phenotypes = selected_phenotypes
validation_dataloader = data.DataLoader(validation_dataset, batch_size=BATCH_SIZE, num_workers = 4)

model = MLP(nlayers=N_LAYERS, hidden_nodes= HIDDEN_NODES, dropout= DROPOUT)
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size = SCHEDULER_STEP_SIZE, gamma = SCHEDULER_REDUCE_RATIO)
criterion = torch.nn.L1Loss()

In [5]:
train_DL_model(
    model,
    optimizer,
    train_dataloader,
    validation_dataloader,
    N_EPOCHS,
    criterion,
    scheduler=scheduler,
    phenotype=selected_phenotypes,
    log_wandb= False
)

Model architecture : 
 MLP(
  (model): Sequential(
    (0): LinearBlock(
      (dropout): Dropout(p=0.25, inplace=False)
      (fc): Linear(in_features=36304, out_features=1024, bias=True)
    )
  )
  (dropout): Dropout(p=0.25, inplace=False)
  (output_layer): Linear(in_features=1024, out_features=1, bias=True)
)
Numbers of parameters: 37177345


AttributeError: 'dict' object has no attribute 'to'