In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pathlib import Path
import glob
import os
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

import copy
import pandas as pd
import numpy as np
import torch

from sklearn.metrics import roc_auc_score

os.environ["WANDB_SILENT"] = "true"

from owkin.training import train, RocLoss
from owkin.dataset import build_dataset
from owkin.models.mono_models import MLP, SVM, LogReg, RandomForest
from owkin.models.aggregators import SmoothMaxAggregator, MaxAggregator

from mil.models import APR, AttentionDeepPoolingMil, MILES

In [3]:
# importing dataset
from mil.data.datasets import musk1
# importing bag_representation
from mil.bag_representation import MILESMapping
# importing validation strategy
from mil.validators import LeaveOneOut
# importing final model, which in this case is the SVC classifier from sklearn
from mil.models import SVC
# importing trainer
from mil.trainer import Trainer
# importing preprocessing 
from mil.preprocessing import StandarizerBagsList
# importing metrics, which in this case are from tf keras metrics
from mil.metrics import AUC

from mil.dimensionality_reduction import PCA

In [4]:
# loading dataset
#(bags_train, y_train), (bags_test, y_test) = musk1.load()

In [5]:
normalizer_type  = "expo_lambda"
val_center = "C_5"

X_train, X_train_mean, y_train, X_val, X_val_mean, y_val, X_test, X_test_mean, df_test = build_dataset(normalizer_type=normalizer_type, val_center=val_center)

In [6]:
X_train_small = X_train[:10]
y_train_small = y_train[:10]


X_val_small = X_val[:10]
y_val_small = y_val[:10]

## Train

In [7]:
metrics = ['acc', AUC]

### MILES

715min, nothing

In [8]:
model = MILES() 

trainer = Trainer()
trainer.prepare(model, metrics=metrics)

history = trainer.fit(X_train, y_train, sample_weights=None, verbose=1)

# printing validation results for each fold
print(history['metrics_val'])

# predicting metrics for the test set
trainer.predict_metrics(X_val, y_val)

In [None]:
model = MILES() 

trainer = Trainer()
trainer.prepare(model, metrics=metrics)

history = trainer.fit(X_train, y_train, sample_weights='balanced', verbose=1)

# printing validation results for each fold
print(history['metrics_val'])

# predicting metrics for the test set
trainer.predict_metrics(X_val, y_val)

### ADP

In [12]:
model = AttentionDeepPoolingMil() 

trainer = Trainer()
trainer.prepare(model, metrics=metrics)

history = trainer.fit(X_train, y_train, sample_weights=None, verbose=1)

# printing validation results for each fold
print(history['metrics_val'])

# predicting metrics for the test set
trainer.predict_metrics(X_val, y_val)

[]


{'accuracy': 0.0, 'auc': 0.5983564}

In [16]:
model = AttentionDeepPoolingMil() 

trainer = Trainer()
trainer.prepare(model, metrics=metrics)

history = trainer.fit(X_train, y_train, sample_weights='balanced', verbose=1)

# printing validation results for each fold
print(history['metrics_val'])

# predicting metrics for the test set
trainer.predict_metrics(X_val, y_val)

[]


{'accuracy': 0.0, 'auc': 0.48193735}

### APR

341min

In [None]:
model = APR() 

trainer = Trainer()
trainer.prepare(model, metrics=metrics)
history = trainer.fit(X_train, y_train, sample_weights='balanced', verbose=1)

# printing validation results for each fold
print(history['metrics_val'])
 
# predicting metrics for the test set
trainer.predict_metrics(X_val, y_val)

Iteration: 1, APR size: 415.97, Discriminative features: 2048
Iteration: 2, APR size: 305.34, Discriminative features: 992
Iteration: 3, APR size: 300.43, Discriminative features: 967
---Grow and discriminate has converged---

[]


{'accuracy': 0.35869566, 'auc': 0.5}

## Test

In [None]:
model = APR() 

trainer = Trainer()
trainer.prepare(model, metrics=metrics)
history = trainer.fit(X_train_small, y_train_small, sample_weights='balanced', verbose=1)

# printing validation results for each fold
print(history['metrics_val'])

# predicting metrics for the test set
trainer.predict_metrics(X_val, y_val)

Iteration: 1, APR size: 131.71, Discriminative features: 2048
Iteration: 2, APR size: 7.15, Discriminative features: 53
Iteration: 3, APR size: 5.54, Discriminative features: 44
Iteration: 4, APR size: 8.08, Discriminative features: 36
Iteration: 5, APR size: 5.66, Discriminative features: 30
Iteration: 6, APR size: 5.8, Discriminative features: 28
Iteration: 7, APR size: 4.78, Discriminative features: 25
Iteration: 8, APR size: 1.88, Discriminative features: 24
---Grow and discriminate has converged---

[]


{'accuracy': 0.35869566, 'auc': 0.5}

In [9]:
model = AttentionDeepPoolingMil() 

trainer = Trainer()
trainer.prepare(model, metrics=metrics)
history = trainer.fit(X_train_small, y_train_small, sample_weights='balanced', verbose=1)

# printing validation results for each fold
print(history['metrics_val'])

# predicting metrics for the test set
trainer.predict_metrics(X_val, y_val)

[]


{'accuracy': 0.0, 'auc': 0.498716}