## Use stacked images (3D) and Densenet121 3D model

Acknowledgements:

- https://www.kaggle.com/rluethy/efficientnet3d-with-one-mri-type
- https://www.kaggle.com/davidbroberts/determining-dicom-image-order
- https://www.kaggle.com/ihelon/brain-tumor-eda-with-animations-and-modeling
- https://www.kaggle.com/furcifer/torch-efficientnet3d-for-mri-no-train
- https://github.com/shijianjian/EfficientNet-PyTorch-3D

This notebook is based on the implementation of Densenet121 3D available here:
https://www.kaggle.com/mikecho/monai-v060-deep-learning-in-healthcare-imaging

It builds 4 models with only one MRI type, then ensembles all of them computing average probabilities


In [1]:
!pip install torchio



In [2]:
import os
import sys 
import json
import glob
import random
import re
import collections
import time

import numpy as np
import pandas as pd
import pydicom
import cv2
import matplotlib.pyplot as plt
import seaborn as sns

import torch
from torch import nn
from torch.utils import data as torch_data
from sklearn import model_selection as sk_model_selection
from torch.nn import functional as torch_functional

import torchio as tio

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score

import xgboost as xgb

In [3]:
if os.path.exists("/kaggle/input/rsna-miccai-brain-tumor-radiogenomic-classification"):
    data_directory = "/kaggle/input/rsna-miccai-brain-tumor-radiogenomic-classification"
    input_monaipath = "/kaggle/input/monai-v060-deep-learning-in-healthcare-imaging/"
    landmarks_directory = "/kaggle/input/rsna-landmarks"
else:
    data_directory = "rsna-miccai-brain-tumor-radiogenomic-classification"
    input_monaipath = "monai-v060-deep-learning-in-healthcare-imaging"
    landmarks_directory = "rsna-landmarks"

model_paths = "models"

In [4]:
mri_types = ['FLAIR', 'T1w', 'T1wCE', 'T2w']
SIZE = 256
NUM_IMAGES = 64
BATCH_SIZE = 2
N_EPOCHS = 6
NUM_FOLDS = 5
SEED = 42
MIN_LR = 1e-8
LR = 1e-4

sys.path.append(input_monaipath)

from monai.networks.nets.densenet import DenseNet121

In [5]:
TOTAL_EPOCHS = N_EPOCHS * len(mri_types) * NUM_FOLDS
TOTAL_EPOCHS

120

## Functions to load images

In [6]:
# def load_dicom_image(path, img_size=SIZE):
#     dicom = pydicom.read_file(path)
#     data = dicom.pixel_array
#     if np.min(data)==np.max(data):
#         data = np.zeros((img_size,img_size))
#         return data
#     
#     data = cv2.resize(data, (img_size, img_size))
#     return data
# 
# 
# def natural_sort(l): 
#     convert = lambda text: int(text) if text.isdigit() else text.lower()
#     alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)]
#     return sorted(l, key=alphanum_key)
# 
# 
# def load_dicom_images_3d(scan_id, num_imgs=NUM_IMAGES, img_size=SIZE, mri_type="FLAIR", split="train"):
#     files = natural_sort(glob.glob(f"{data_directory}/{split}/{scan_id}/{mri_type}/*.dcm"))
#     every_nth = len(files) / num_imgs
#     indexes = [min(int(round(i*every_nth)), len(files)-1) for i in range(0,num_imgs)]
#     
#     files_to_load = [files[i] for i in indexes]
#     
#     img3d = np.stack([load_dicom_image(f) for f in files_to_load]).T 
#     
#     img3d = img3d - np.min(img3d)
#     if np.max(img3d) != 0:
#         img3d = img3d / np.max(img3d)
#     
#     return np.expand_dims(img3d,0)
# 
# 
# load_dicom_images_3d("00000", mri_type=mri_types[0]).shape

In [7]:
def set_seed(seed):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True

set_seed(SEED)

## remove samples as described in https://www.kaggle.com/c/rsna-miccai-brain-tumor-radiogenomic-classification/discussion/262046

In [8]:
# samples_to_exclude = [109, 123, 709]
# 
df = pd.read_csv(f"{data_directory}/train_labels.csv")
print("original shape", df.shape)
# df = df[~df.BraTS21ID.isin(samples_to_exclude)]
# print("new shape", df.shape)
# display(df)

original shape (585, 2)


In [9]:
# df = df.iloc[:16]

## Model and training classes

In [10]:
# landmarks = torch.load(os.path.join(landmarks_directory, (f"{mri_type}_landmarks.npy")))
# landmarks_dict = {
#     "default_image_name": landmarks,
# }

In [36]:
transforms = [
    # tio.HistogramStandardization(landmarks_dict),
    tio.ToCanonical(),
    tio.Resample(1, image_interpolation='bspline'),
    tio.Resample('T1w', image_interpolation='nearest'),
    tio.RandomAnisotropy(p=0.25),              # make images look anisotropic 25% of times
    # TODO: crop only sometimes! this happens all the time which is risky
    # tio.CropOrPad((256, 256, 64)),            # tight crop around brain
    # TODO: calculate landmarks:
    # https://colab.research.google.com/github/fepegar/torchio-notebooks/blob/main/notebooks/Data_preprocessing_and_augmentation_using_TorchIO_a_tutorial.ipynb#scrollTo=85COw2H63PfH
    # too strong? (removes part of the scan!)
    # tio.ZNormalization(
    #     masking_method=get_foreground),        # zero mean, unit variance of foreground
    #   works!
    tio.RandomBlur(p=0.25),                    # blur 25% of times
    tio.RandomNoise(p=0.25),                   # Gaussian noise 25% of times
    tio.OneOf({                                # either
        tio.RandomAffine(): 0.8,               # random affine
        tio.RandomElasticDeformation(): 0.2,   # or random elastic deformation
    }, p=0.8),                                 # applied to 80% of images
    tio.RandomBiasField(p=0.3),                # magnetic field inhomogeneity 30% of times
    tio.OneOf({                                # either
        tio.RandomMotion(): 1,                 # random motion artifact
        tio.RandomSpike(): 2,                  # or spikes
        tio.RandomGhosting(): 2,               # or ghosts
    }, p=0.5),                                 # applied to 50% of images
]

In [37]:
preprocess = tio.Compose(transforms)

In [25]:
from typing import List
from pathlib import Path

class RSNAMICCAI(tio.data.dataset.SubjectsDataset):
    """RSNA-MICCAI Brain Tumor Radiogenomic Classification challenge dataset.

    This is a helper class for the dataset used in the
    `RSNA-MICCAI Brain Tumor Radiogenomic Classification challenge`_ hosted on
    `kaggle <https://www.kaggle.com/>`_. The dataset must be downloaded before
    instantiating this class (as oposed to, e.g., :class:`torchio.datasets.IXI`).

    This `kaggle kernel <https://www.kaggle.com/fepegar/preprocessing-mri-with-torchio/>`_
    includes a usage example including preprocessing of all the scans.

    If you reference or use the dataset in any form, include the following
    citation:

    U.Baid, et al., "The RSNA-ASNR-MICCAI BraTS 2021 Benchmark on Brain Tumor
    Segmentation and Radiogenomic Classification", arXiv:2107.02314, 2021.

    Args:
        root_dir: Directory containing the dataset (``train`` directory,
            ``test`` directory, etc.).
        train: If ``True``, the ``train`` set will be used. Otherwise the
            ``test`` set will be used.
        ignore_empty: If ``True``, the three subjects flagged as "presenting
            issues" (empty images) by the challenge organizers will be ignored.
            The subject IDs are ``00109``, ``00123`` and ``00709``.

    Example:
        >>> import torchio as tio
        >>> from subprocess import call
        >>> call('kaggle competitions download -c rsna-miccai-brain-tumor-radiogenomic-classification'.split())
        >>> root_dir = 'rsna-miccai-brain-tumor-radiogenomic-classification'
        >>> train_set = tio.datasets.RSNAMICCAI(root_dir, train=True)
        >>> test_set = tio.datasets.RSNAMICCAI(root_dir, train=False)
        >>> len(train_set), len(test_set)
        (582, 87)


    .. _RSNA-MICCAI Brain Tumor Radiogenomic Classification challenge: https://www.kaggle.com/c/rsna-miccai-brain-tumor-radiogenomic-classification
    """  # noqa: E501
    id_key = 'BraTS21ID'
    label_key = 'MGMT_value'
    modalities = 'T1w', 'T1wCE', 'T2w', 'FLAIR'
    bad_subjects = '00109', '00123', '00709'

    def __init__(
            self,
            root_dir,
            df,
            train: bool = True,
            ignore_empty: bool = True,
            **kwargs,
            ):
        self.root_dir = Path(root_dir).expanduser().resolve()
        self.df = df
        subjects = self._get_subjects(self.root_dir, train, ignore_empty)
        super().__init__(subjects, **kwargs)
        self.train = train

    def _get_subjects(
            self,
            root_dir: Path,
            train: bool,
            ignore_empty: bool,
            ) -> List[tio.data.Subject]:
        subjects = []
        if train:
            labels_dict = {
                brats_id: mgmt_value
                for brats_id, mgmt_value in zip(
                    np.array([str(x).zfill(5) for x in self.df.BraTS21ID.values]),
                    self.df.MGMT_value.values
                )
            }
            subjects_dir = root_dir / 'train'
        else:
            subjects_dir = root_dir / 'test'

        for subject_id in sorted(labels_dict):
            if ignore_empty and subject_id in self.bad_subjects:
                continue
            try:
                int(subject_id)
            except ValueError:
                continue
            images_dict = {self.id_key: subject_id}
            if train:
                images_dict[self.label_key] = labels_dict[subject_id]
            for modality in self.modalities:
                image_dir = f'{subjects_dir}/{subject_id}/{modality}'
                filepaths = os.listdir(image_dir)
                num_files = len(filepaths)
                path = filepaths[0] if num_files == 1 else image_dir
                images_dict[modality] = tio.data.ScalarImage(path)
            subject = tio.data.Subject(images_dict)
            subjects.append(subject)
        return subjects

In [26]:
# import csv
# csv_path = f'{data_directory}/train_labels.csv'
# with open(csv_path) as csvfile:
#     reader = csv.DictReader(csvfile)
#     labels_dict = {
#         row['BraTS21ID']: int(row['MGMT_value'])
#         for row in reader
#     }
# labels_dict

In [27]:
def build_model():
    model = DenseNet121(spatial_dims=3, in_channels=1, out_channels=1)
    return model    

In [45]:
class Trainer:
    def __init__(
        self, 
        model, 
        device, 
        optimizer, 
        criterion
    ):
        self.model = model
        self.device = device
        self.optimizer = optimizer
        self.lr_scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=MIN_LR, max_lr=LR, cycle_momentum=False)
        # self.lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(self.optimizer, gamma=LR_DECAY)
        self.criterion = criterion

        self.best_valid_score = .0
        self.n_patience = 0
        self.lastmodel = None
        
        self.val_losses = []
        self.train_losses = []
        self.val_auc = []
        
    def fit(self, epochs, train_loader, valid_loader, save_path, patience):      
        for n_epoch in range(1, epochs + 1):
            self.info_message("EPOCH: {}", n_epoch)
            
            train_loss, train_time = self.train_epoch(train_loader)
            valid_loss, valid_auc, valid_time = self.valid_epoch(valid_loader)
            
            self.train_losses.append(train_loss)
            self.val_losses.append(valid_loss)
            self.val_auc.append(valid_auc)
            
            self.info_message(
                "[Epoch Train: {}] loss: {:.4f}, time: {:.2f} s",
                n_epoch, train_loss, train_time
            )
            
            self.info_message(
                "[Epoch Valid: {}] loss: {:.4f}, auc: {:.4f}, time: {:.2f} s",
                n_epoch, valid_loss, valid_auc, valid_time
            )

            if self.best_valid_score < valid_auc: 
                self.save_model(n_epoch, save_path, valid_loss, valid_auc)
                self.info_message(
                     "auc improved from {:.4f} to {:.4f}. Saved model to '{}'", 
                    self.best_valid_score, valid_auc, self.lastmodel
                )
                self.best_valid_score = valid_auc
                self.n_patience = 0
            else:
                self.n_patience += 1
            
            if self.n_patience >= patience:
                self.info_message("\nValid auc didn't improve last {} epochs.", patience)
                break
            
    def train_epoch(self, train_loader):
        self.model.train()
        t = time.time()
        sum_loss = 0

        for step, batch in enumerate(train_loader, 1):
            samples = []
            for sample in batch["X"]:
                
            batch["X"] = cv2.resize(batch["X"], (SIZE, SIZE))
            X = batch["X"].clone().detach().float().to(self.device)
            targets = batch["y"].to(self.device)
            self.optimizer.zero_grad()
            outputs = self.model(X).squeeze(1)
            loss = self.criterion(outputs, targets)
                
            loss.backward()

            sum_loss += loss.detach().item()
            
            self.optimizer.step()
            # for param_group in self.optimizer.param_groups:
            #     print(param_group['lr'])
            
            message = 'Train Step {}/{}, train_loss: {:.4f}'
            self.info_message(message, step, len(train_loader), sum_loss/step, end="\r")
            
        self.lr_scheduler.step()
        
        return sum_loss/len(train_loader), int(time.time() - t)
    
    def valid_epoch(self, valid_loader):
        self.model.eval()
        t = time.time()
        sum_loss = 0
        y_all = []
        outputs_all = []

        for step, batch in enumerate(valid_loader, 1):
            with torch.no_grad():
                targets = batch["y"].to(self.device)

                output = torch.sigmoid(self.model(batch["X"].clone().detach().float().to(self.device)).squeeze(1))
                loss = self.criterion(output, targets)
                sum_loss += loss.detach().item()

                y_all.extend(batch["y"].tolist())
                outputs_all.extend(output.tolist())

            message = 'Valid Step {}/{}, valid_loss: {:.4f}'
            self.info_message(message, step, len(valid_loader), sum_loss/step, end="\r")
            
        y_all = [1 if x > 0.5 else 0 for x in y_all]
        auc = roc_auc_score(y_all, outputs_all)
        
        return sum_loss/len(valid_loader), auc, int(time.time() - t)
    
    def save_model(self, n_epoch, save_path, loss, auc):
        self.lastmodel = f"{save_path}-e{n_epoch}-loss{loss:.3f}-auc{auc:.3f}.pth"
        torch.save(
            {
                "model_state_dict": self.model.state_dict(),
                "optimizer_state_dict": self.optimizer.state_dict(),
                "best_valid_score": self.best_valid_score,
                "n_epoch": n_epoch,
            },
            self.lastmodel,
        )
        
    def display_plots(self, mri_type):
        plt.figure(figsize=(10,5))
        plt.title("{}: Training and Validation Loss")
        plt.plot(self.val_losses,label="val")
        plt.plot(self.train_losses,label="train")
        plt.xlabel("iterations")
        plt.ylabel("Loss")
        plt.legend()
        plt.show()
        plt.close()
        
        plt.figure(figsize=(10,5))
        plt.title("{}: Validation AUC-ROC")
        plt.plot(self.val_auc,label="val")
        plt.xlabel("iterations")
        plt.ylabel("AUC")
        plt.legend()
        plt.show()
        plt.close()
    
    @staticmethod
    def info_message(message, *args, end="\n"):
        print(message.format(*args), end=end)

# Prediction

In [46]:
def predict(model, df, dataset, mri_type, split):
    print("Predict:", mri_type)
    try:
        dataset = RSNAMICCAI(data_directory, df, train=True, transform=preprocess)
    except:
        dataset = RSNAMICCAI(data_directory, df, train=False, transform=preprocess)

    data_loader = torch_data.DataLoader(
        dataset,
        num_workers=mp.cpu_count(),
        collate_fn=lambda x: x[0],
    )
   
    model.eval()
    
    y_pred = []
    ids = []

    for e, batch in enumerate(data_loader,1):
        print(f"{e}/{len(data_loader)}", end="\r")
        with torch.no_grad():
            tmp_pred = torch.sigmoid(model(batch["X"].clone().detach().float().to(device)).squeeze(1)).cpu().numpy().squeeze()
            if tmp_pred.size == 1:
                y_pred.append(tmp_pred)
            else:
                y_pred.extend(tmp_pred.tolist())
            ids.extend(batch["id"].numpy().tolist())
            
    return y_pred

## train loop

In [47]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def train_mri_type(df, df_test, mri_type, skf):

    oof_train = np.zeros((len(df)))
    oof_test = np.zeros((len(df_test)))
    oof_test_skf = np.empty((5, len(df_test)))

    lastmodels = []
    for i, (train_index, val_index) in enumerate(skf.split(df, df["MGMT_value"], df["MGMT_value"])):
        df_train = df.iloc[train_index]
        df_valid = df.iloc[val_index]
        
        train = df_train.copy()
        valid = df_valid.copy()
        train.loc[:,"MRI_Type"] = mri_type
        valid.loc[:,"MRI_Type"] = mri_type

        print(train.shape, valid.shape)
        display(valid.head())
        print(len(train))
        display(valid.head())
        print(len(valid))

        # landmarks = torch.load(os.path.join(landmarks_directory, (f"{mri_type}_landmarks.npy")))
        # landmarks_dict = {
        #     "default_image_name": landmarks,
        # }

        train_dataset = RSNAMICCAI(data_directory, df_train, train=True, transform=preprocess)
        valid_dataset = RSNAMICCAI(data_directory, df_valid, train=True, transform=preprocess)

        train_loader = torch_data.DataLoader(
            train_dataset,
            batch_size=BATCH_SIZE,
            shuffle=True,
            num_workers=8,
        )

        valid_loader = torch_data.DataLoader(
            valid_dataset, 
            batch_size=BATCH_SIZE,
            shuffle=False,
            num_workers=8,
        )

        model = build_model()
        model.to(device)

        optimizer = torch.optim.Adam(model.parameters(), lr=LR)

        criterion = torch_functional.binary_cross_entropy_with_logits

        trainer = Trainer(
            model, 
            device, 
            optimizer, 
            criterion
        )

        os.makedirs(f"models/fold_{i}", exist_ok=True)
        history = trainer.fit(
            N_EPOCHS, 
            train_loader, 
            valid_loader, 
            f"{model_paths}/fold_{i}/{mri_type}", 
            N_EPOCHS,
        )

        trainer.display_plots(mri_type)

        oof_train[val_index] = predict(model, df_valid, mri_type, "train")
        oof_test_skf[i, :] = predict(model, df_test, mri_type, "test")

        lastmodels.append(trainer.lastmodel)

    oof_test = oof_test_skf.mean(axis=0)

    return oof_train, oof_test, lastmodels

In [48]:
df_test = pd.read_csv(f"{data_directory}/sample_submission.csv")
df_test["MGMT_value"] = 0

In [49]:
rkf = StratifiedKFold(n_splits=5, shuffle=True, random_state=21)

In [50]:
%%time

oof_train = []
oof_test = []
modelfiles = []

for mri_type in mri_types:
    trn, tst, modelfile = train_mri_type(df, df_test, mri_type, rkf)
    oof_train.append(trn)
    oof_test.append(tst)
    modelfiles.append(modelfile)

(468, 3) (117, 3)


Unnamed: 0,BraTS21ID,MGMT_value,MRI_Type
2,3,0,FLAIR
9,14,1,FLAIR
11,18,0,FLAIR
18,26,1,FLAIR
24,35,1,FLAIR


468


Unnamed: 0,BraTS21ID,MGMT_value,MRI_Type
2,3,0,FLAIR
9,14,1,FLAIR
11,18,0,FLAIR
18,26,1,FLAIR
24,35,1,FLAIR


117
EPOCH: 1


ImageSeriesReader (0x5650f8816830): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.00101728

ImageSeriesReader (0x5650f8816830): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000760964

ImageSeriesReader (0x5650f8816830): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000298474

ImageSeriesReader (0x5650f8816830): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000848286

ImageSeriesReader (0x5650f8816830): Non uniform sampling or missing slices detected,  maximum nonuniformity:1.97312

ImageSeriesReader (0x5650f8816830): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000298474

ImageSeriesReader (0x5650f8816830): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000760964

ImageSeriesReader (0x5650f8816830): Non uniform sampling or missing slices detected,  maximum nonuniformity:0.000848286

ImageSeriesReader (0x5650f8816830): N

KeyboardInterrupt: 

In [51]:
np.save("oof_train.npy", oof_train)
np.save("oof_test.npy", oof_test)

In [52]:
x_train = np.swapaxes(np.array(oof_train), 0, 1)
x_test = np.swapaxes(np.array(oof_test), 0, 1)

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f53038aa280>
Traceback (most recent call last):
  File "/home/christian/miniconda/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/home/christian/miniconda/envs/pytorch/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1301, in _shutdown_workers
    w.join(timeout=_utils.MP_STATUS_CHECK_INTERVAL)
  File "/home/christian/miniconda/envs/pytorch/lib/python3.8/multiprocessing/process.py", line 149, in join
    res = self._popen.wait(timeout)
  File "/home/christian/miniconda/envs/pytorch/lib/python3.8/multiprocessing/popen_fork.py", line 44, in wait
    if not wait([self.sentinel], timeout):
  File "/home/christian/miniconda/envs/pytorch/lib/python3.8/multiprocessing/connection.py", line 931, in wait
    ready = selector.select(timeout)
  File "/home/christian/miniconda/envs/pytorch/lib/python3.8/selectors.py", lin

AxisError: axis2: axis 1 is out of bounds for array of dimension 1

In [None]:
len(df), len(x_train), len(x_test)

In [None]:
df_test = pd.read_csv(f"{data_directory}/sample_submission.csv")

In [None]:
for i, mri_type in enumerate(mri_types):
    df[f"level0_{mri_type}_preds"] = x_train[:, i]
    df_test[f"level0_{mri_type}_preds"] = x_test[:, i]

In [None]:
y = df["MGMT_value"].values
X = df.drop(["MGMT_value"], axis=1)

In [None]:
params = {
    'learning_rate': [0.01, 0.005, 0.001],
    'n_estimators': [5000, 10000], # 1000, 20000
    'min_child_weight': [1, 5, 10, 20],
    'gamma': [2, 3, 5],
    'subsample': [1.0], # 0.6, 0.8, 
    'colsample_bytree': [0.6, 0.8, 1.0],
    'max_depth': [3, 4, 5]
}

In [None]:
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV

In [None]:
gbc = xgb.XGBClassifier(
    objective='binary:logistic',
)

In [None]:
folds = 5
param_comb = 20

skf = StratifiedKFold(n_splits=folds, shuffle=True, random_state=1001)

random_search = RandomizedSearchCV(
    gbc,
    param_distributions=params,
    n_iter=param_comb,
    scoring='roc_auc',
    n_jobs=8,
    cv=skf.split(X, y),
    verbose=10,
    random_state=1001,
)

random_search.fit(X, y)

In [None]:
print('\n All results:')
print(random_search.cv_results_)
print('\n Best estimator:')
print(random_search.best_estimator_)
print('\n Best normalized gini score for %d-fold search with %d parameter combinations:' % (folds, param_comb))
print(random_search.best_score_ * 2 - 1)
print('\n Best hyperparameters:')
print(random_search.best_params_)
results = pd.DataFrame(random_search.cv_results_)
results.to_csv('xgb-random-grid-search-results-01.csv', index=False)

In [None]:
results[results["rank_test_score"] < 8]

In [None]:
results[results["rank_test_score"] < 8]

In [None]:
x_test = df_test.copy()
x_test = x_test.drop(["MGMT_value"], axis=1)

In [None]:
test_preds = random_search.predict_proba(x_test)

In [None]:
test_preds = test_preds[:,1]
test_preds[:4]

In [None]:
# auc = roc_auc_score(y_valid, val_preds)
# print(f"Validation ensemble AUC: {auc:.4f}")
sns.displot(test_preds)

**Submission**

In [None]:
submission = pd.read_csv(f"{data_directory}/sample_submission.csv")
submission["MGMT_value"] = test_preds
submission.to_csv("submission.csv", index=False)