# Modality Prediction Baseline on NeuralNets
Brings AI

In [10]:
import pytorch_lightning as pl
import torch

from lab_scripts.data import dataloader
from lab_scripts.models.baselines import neuralnet
from lab_scripts.metrics.mp import mp_metrics
from lab_scripts.utils import utils
utils.change_directory_to_repo()

import logging
logging.basicConfig(level=logging.INFO)

In [11]:
data = dataloader.load_data('mp/official/gex_to_adt')
train_mod1 = data['train_mod1']
train_mod2 = data['train_mod2']
test_mod1 = data['test_mod1']
test_mod2 = data['test_mod2']

In [12]:
mod1 = utils.get_mod(train_mod1)
mod2 = utils.get_mod(train_mod2)
print(f'Modality of train_mod1 is {mod1}')
print(f'Modality of train_mod2 is {mod2}')

Modality of train_mod1 is gex
Modality of train_mod2 is adt


In [13]:
task_type = utils.get_task_type(mod1, mod2)
print(f'Current data type is {task_type}')

Current data type is gex_to_adt


In [14]:
# Preprocess data
# It will train StandardScaler for each modality
train_mod1_X, scaler_mod1 = neuralnet.preprocess_dataset(train_mod1)
train_mod2_X, scaler_mod2 = neuralnet.preprocess_dataset(train_mod2)
train_dataloader = neuralnet.get_dataloader(
    train_mod1_X, train_mod2_X, batch_size=128, shuffle=True
)

In [15]:
# We've already trained scalers, so just pass them
test_mod1_X, _ = neuralnet.preprocess_dataset(test_mod1, scaler_mod1)
test_mod2_X, _ = neuralnet.preprocess_dataset(test_mod2, scaler_mod2)
test_dataloader = neuralnet.get_dataloader(
    test_mod1_X, test_mod2_X, batch_size=128, shuffle=False
)

In [16]:
config = {
    # size of the input layer
    'input_features': train_mod1_X.shape[1],

    # size of the output layer
    'output_features': train_mod2_X.shape[1],

    # learning rate
    'lr': 0.01,

    'use_dropout': True,
    'dropout': 0.5
}

In [17]:
# Create model
model = neuralnet.BaselineModel(config)

In [18]:
# Train it on gpu
trainer = pl.Trainer(gpus=1, max_epochs=50)
trainer.fit(model, train_dataloader, test_dataloader)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type    | Params
-------------------------------------
0 | linear_1 | Linear  | 7.0 M 
1 | linear_2 | Linear  | 150 K 
2 | linear_3 | Linear  | 40.3 K
3 | dropout  | Dropout | 0     
-------------------------------------
7.2 M     Trainable params
0         Non-trainable params
7.2 M     Total params
28.671    Total estimated model params size (MB)


                                                              

  rank_zero_warn(
  rank_zero_warn(


Epoch 15:  35%|███▍      | 82/236 [03:57<07:21,  2.87s/it, loss=0.592, v_num=0, val_loss=0.906]
Epoch 49: 100%|██████████| 236/236 [00:05<00:00, 44.37it/s, loss=0.549, v_num=1, val_loss=0.865]


In [19]:
# Make predictions
predictions = trainer.predict(model, test_dataloader)

# Concat them into single np.ndarray
predictions = torch.cat(predictions, dim=0).cpu().numpy() 

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(


Predicting: 228it [00:00, ?it/s]


In [20]:
# Unscale predictions back to modality 2
predictions = scaler_mod2.inverse_transform(predictions)

In [21]:
# Calculate target metric
mp_metrics.calculate_target(predictions, test_mod2)

0.46105143