In [2]:
# Prepare the VIMELightning Module
from ts3l.pl_modules import VIMELightning
from ts3l.utils.vime_utils import VIMEDataset
from ts3l.utils import TS3LDataModule, get_category_cardinality
from ts3l.utils.vime_utils import VIMEConfig
from ts3l.utils.embedding_utils import IdentityEmbeddingConfig
from ts3l.utils.backbone_utils import MLPBackboneConfig
from pytorch_lightning import Trainer

import pandas as pd

In [7]:
test_data = pd.read_csv('../data/test_data.csv')
train_data = pd.read_csv('../data/train_data.csv')
unlabelled_data = pd.read_csv('../data/unlabelled_data.csv')

print(f'Train data shape: {train_data.shape}')
print(f'Test data shape: {test_data.shape}')
print(f'Unlabelled data shape: {unlabelled_data.shape}')

Train data shape: (465, 33)
Test data shape: (117, 33)
Unlabelled data shape: (1168, 30)


In [9]:
test_data.head()

Unnamed: 0,ESR1,PGR,ERBB2,MKI67,PLAU,ELAVL1,EGFR,BTRC,FBXO6,SHMT2,...,Radio Therapy,Chemotherapy,Hormone Therapy,Neoplasm Histologic Grade,Cellularity,Surgery-breast conserving,Surgery-mastectomy,Label,DssTime,Event
0,11.23975,5.954311,9.739996,6.046045,10.040187,5.905724,5.881255,6.538235,7.260572,10.774752,...,1,0,1,3,0.5,0,1,1,7.8,1
1,10.927313,7.002502,10.033753,5.568993,8.306619,6.547491,5.733367,6.128118,7.917904,9.514045,...,1,0,1,2,0.5,0,1,0,132.033333,1
2,6.312633,5.305683,9.068778,5.919384,8.210977,5.896152,5.634379,5.625037,7.684047,11.422518,...,1,1,0,3,1.0,0,1,1,28.5,1
3,9.1852,5.480888,9.580607,5.655789,7.756504,6.026981,6.008594,6.269051,7.428641,9.478211,...,1,0,1,3,1.0,0,1,1,39.166667,1
4,7.249462,5.164281,10.233184,5.721403,8.918334,6.392132,5.58845,6.062906,7.968933,9.578638,...,1,1,0,2,1.0,0,1,1,31.3,1


In [12]:
full_X_train = train_data.drop(columns=['Label', 'DssTime', 'Event'])
full_y_train = train_data['Label']

X_test = test_data.drop(columns=['Label', 'DssTime', 'Event'])
y_test = test_data['Label']

In [15]:
from sklearn.model_selection import train_test_split

# Split the train_data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(
    full_X_train,
    full_y_train,
    test_size=0.2,
    random_state=42,
    stratify=full_y_train)

print(f'Training data shape: {X_train.shape}')
print(f'Validation data shape: {X_val.shape}')

Training data shape: (372, 30)
Validation data shape: (93, 30)


In [16]:
metric = "accuracy_score"
input_dim = X_train.shape[1]
predictor_dim = 1024
output_dim = 2
alpha1 = 2.0
alpha2 = 2.0
beta = 1.0
K = 3
p_m = 0.2

In [17]:
batch_size = 128
max_epochs = 20

In [18]:
embedding_config = IdentityEmbeddingConfig(input_dim = input_dim)
backbone_config = MLPBackboneConfig(input_dim = embedding_config.output_dim)

In [21]:
config = VIMEConfig( 
                    task="classification",
                    loss_fn="CrossEntropyLoss",
                    metric=metric,
                    metric_hparams={},
                    embedding_config=embedding_config,
                    backbone_config=backbone_config,
                    predictor_dim=predictor_dim,
                    output_dim=output_dim,
                    alpha1=alpha1,
                    alpha2=alpha2, 
                    beta=beta,
                    K=K,
                    p_m = p_m,
                    #cat_cardinality=get_category_cardinality(data, category_cols),
                    #num_continuous=len(continuous_cols)
                    num_continuous=10
)


In [None]:
pl_vime = VIMELightning(config)

Seed set to 42


In [None]:
### First Phase Learning
train_ds = VIMEDataset(
    X=X_train,
    unlabeled_data=X_unlabeled,
    config=config,
    continuous_cols=continuous_cols,
    category_cols=category_cols
)
valid_ds = VIMEDataset(X = X_valid, config=config, continuous_cols = continuous_cols, category_cols = category_cols)

datamodule = TS3LDataModule(train_ds, valid_ds, batch_size, train_sampler='random')