# Version Notifications:

date : 2020/12/04

key modifications :
1. Train nonscored targets first.
2. MultilabelStratifiedKFold on drug_id.

## packages, functions, utils 

In [8]:
import numpy as np
import pandas as pd 
import random
import os
import gc

from tqdm.notebook import tqdm
from scipy.special import erfinv

from sklearn.metrics import log_loss

import seaborn as sns
from matplotlib import pyplot as plt

import torch
from torch import nn
from torch.nn import functional as F
from torch.nn import Module
from torch import optim
from torch.utils.data import Dataset, DataLoader

from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Device : {device}')

import warnings 
warnings.filterwarnings('ignore')

Device : cpu


In [9]:
def cate2dummy(train, test, cate_features):
    df_cate = train[['sig_id'] + cate_features].append(test[['sig_id'] + cate_features])
    df_cate[cate_features] = df_cate[cate_features].astype(str)
    df_dumm = pd.get_dummies(df_cate, columns=cate_features)

    dumm_features = [c for c in df_dumm.columns if c not in ['sig_id']]
    train = train.merge(df_dumm, on='sig_id', how='left')
    test = test.merge(df_dumm, on='sig_id', how='left')
    
    return train, test, dumm_features

In [10]:
class GaussRankScaler():
    def __init__(self):
        self.epsilon = 0.001
        self.lower = -1 + self.epsilon
        self.upper = 1 - self.epsilon
        self.range = self.upper - self.lower
        
    def fit_transform(self, X):
        i = np.argsort(X, axis=0)
        j = np.argsort(i, axis=0)
        
        assert (j.min() == 0)
        assert (j.max() == len(j) - 1)
        
        j_range = len(j) - 1
        self.divider = j_range / self.range
        
        transformed = j / self.divider
        transformed = transformed - self.upper
        transformed = erfinv(transformed)
        
        return transformed
    
def gauss_rank_features(train, test, features):
    n_train = len(train)
    
    for f in tqdm(features):
        feat = train[f].append(test[f])
        feat_transformed = GaussRankScaler().fit_transform(feat)
        train[f] = feat_transformed.iloc[:n_train]
        test[f] = feat_transformed.iloc[n_train:]
        
    return train, test

In [11]:
def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic=True

## Data Preparation

In [14]:
ON_KAGGLE = False

if ON_KAGGLE:
    input_dir = '/kaggle/input/lish-moa'
    output_dir = 'nn_04'
    if not os.path.isdir(output_dir):
        os.mkdir(output_dir)
else:
    input_dir = '../_data/lish-moa'
    output_dir = 'output/nn_04'
    
    if not os.path.isdir('output'):
        os.mkdir('output')
        
    if not os.path.isdir(output_dir):
        os.mkdir(output_dir)

In [15]:
os.listdir(input_dir)

['zip files',
 'train_targets_nonscored.csv',
 'train_drug.csv',
 '.DS_Store',
 'test_features.csv',
 'train_targets_scored.csv',
 'train_features.csv',
 'sample_submission.csv']

In [33]:
train = pd.read_csv(os.path.join(input_dir, 'train_features.csv'))
test = pd.read_csv(os.path.join(input_dir, 'test_features.csv'))

targets_scored = pd.read_csv(os.path.join(input_dir, 'train_targets_scored.csv'))
targets_nonscored = pd.read_csv(os.path.join(input_dir, 'train_targets_nonscored.csv'))

train_drug = pd.read_csv(os.path.join(input_dir, 'train_drug.csv'))

submission = pd.read_csv(os.path.join(input_dir, 'sample_submission.csv'))

In [34]:
gene_features = [c for c in train.columns if c.startswith('g-')]
cell_features = [c for c in train.columns if c.startswith('c-')]
cate_features = ['cp_type', 'cp_time', 'cp_dose']

target_columns_scored = [c for c in targets_scored if c not in ['sig_id']]
target_columns_nonscored = [c for c in targets_nonscored if c not in ['sig_id']]

In [35]:
target_columns_all = target_columns_scored + target_columns_nonscored

In [36]:
train = pd.merge(train, targets_scored, on='sig_id', how='left')
train = pd.merge(train, targets_nonscored, on='sig_id', how='left')

In [37]:
train = train.loc[train['cp_type'] != 'ctl_vehicle']
test = test.loc[test['cp_type'] != 'ctl_vehicle']

cate_features.remove('cp_type')

In [38]:
cp_time_map = {
    24: 0,
    48: 1,
    72: 2
}

train['cp_time'] = train['cp_time'].map(cp_time_map)
test['cp_time'] = test['cp_time'].map(cp_time_map)

In [39]:
train, test, dumm_features = cate2dummy(train, test, cate_features)

In [40]:
train, test = gauss_rank_features(train, test, gene_features+cell_features)

HBox(children=(FloatProgress(value=0.0, max=872.0), HTML(value='')))




### Model Construction

In [47]:
class Model(nn.Module):
    def __init__(self, num_features, num_targets):
        super(Model, self).__init__()
        self.hidden_size = [1500, 1250, 1000, 750]
        self.dropout_value = [0.5, 0.35, 0.2, 0.25]
        
        self.bn1 = nn.BatchNorm1d(num_features)
        self.ds1 = nn.Linear(num_features, self.hidden_size[0])
        
        self.bn2 = nn.BatchNorm1d(self.hidden_size[0])
        self.do2 = nn.Dropout(self.dropout_value[0])
        self.ds2 = nn.Linear(self.hidden_size[0], self.hidden_size[1])
        
        self.bn3 = nn.BatchNorm1d(self.hidden_size[1])
        self.do3 = nn.Dropout(self.dropout_value[1])
        self.ds3 = nn.Linear(self.hidden_size[1], self.hidden_size[2])
        
        self.bn4 = nn.BatchNorm1d(self.hidden_size[2])
        self.do4 = nn.Dropout(self.dropout_value[2])
        self.ds4 = nn.Linear(self.hidden_size[2], self.hidden_size[3])
        
        self.bn5 = nn.BatchNorm1d(self.hidden_size[3])
        self.do5 = nn.Dropout(self.dropout_value[3])
        self.ds5 = nn.utils.weight_norm(nn.Linear(self.hidden_size[3], num_targets))
        
    def forward(self, x):
        x = self.bn1(x)
        x = F.leaky_relu(self.ds1(x))
        
        x = self.bn2(x)
        x = self.do2(x)
        x = F.leaky_relu(self.ds2(x))

        x = self.bn3(x)
        x = self.do3(x)
        x = F.leaky_relu(self.ds3(x))

        x = self.bn4(x)
        x = self.do4(x)
        x = F.leaky_relu(self.ds4(x))

        x = self.bn5(x)
        x = self.do5(x)
        x = self.ds5(x)
        
        return x
    

class LabelSmoothingLoss(nn.Module):
    def __init__(self, classes, smoothing=0.0, dim=-1):
        super(LabelSmoothingLoss, self).__init__()
        self.confidence = 1.0 - smoothing
        self.smoothing = smoothing
        self.cls = classes
        self.dim = dim
        
    def forward(self, pred, target):
        pred = pred.log_softmax(dim=self.dim)
        
        with torch.no_grad():
            true_dist = torch.zeros_like(pred)
            true_dist.fill_(self.smoothing / (self.cls - 1))
            true_dist.scatter_(1, target.data.unsqueese(1), self.confidence)
            
        return torch.mean(torch.sum(-true_dist * pred, dim=self.dim))

In [45]:
class FineTuneScheduler:
    def __init__(self, epochs):
        self.epochs = epochs
        self.epochs_pre_step = 1
        self.frozen_layers = []
        
    def copy_without_top(self, model, num_features, num_targets, num_targets_new):
        self.frozen_layers = []
        
        model_new = Model(num_features, num_targets)
        model_new.load_state_dict(model.state_dict())
        
        # Freeze all weights
        for name, param in model_new.named_parameters():
            layer_index = name.split('.')[0][-1]
            
            if layer_index == 5:
                continue
                
            param.requires_grad = False
            
            # Save frozen layer names
            if layer_index not in self.frozen_layers:
                self.frozen_layers.append(layer_index)
                
        self.epochs_per_step = self.epochs // len(self.frozen_layers)
        
        # Replace the top layers with another ones
        model_new.batch_norm5 = nn.BatchNorm1d(model_new.hidden_size[3])
        model_new.dropout5 = nn.Dropout(model_new.dropout_value[3])
        model_new.dense5 = nn.utils.weight_norm(nn.Linear(model_new.hidden_size[-1], num_targets_new))
        model_new.to(DEVICE)
        
        return model_new

In [46]:
# all_features = gene_features + cell_features + dumm_features + ['cp_time']
all_features = gene_features + cell_features

GRADIENT_ACCUMULATION_STEPS = 1
MAX_GRAD_NORM = 1000
HIDDEN_SIZE = 512
DROPOUT = 0.2
LEARNING_RATE = 1e-2
WEIGHT_DECAY = 1e-6
BATCH_SIZE = 32
EPOCHS = 20

NUM_FEATURES = len(all_features)
NUM_TARGETS = len(target_columns_scored)
NUM_TARGETS_ALL = len(target_columns_scored) + len(target_columns_nonscored)
NUM_FOLDS = 5

### Train and predict framework

In [48]:
class TrainDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data.values
        self.labels = labels.values
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        X = torch.FloatTensor(self.data[idx])
        y = torch.tensor(self.labels[idx]).float()
        
        return X, y

In [49]:
class TestDataset(Dataset):
    def __init__(self, data):
        self.data = data.values
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        X = torch.FloatTensor(self.data[idx])
        return X

In [50]:
class AverageMeter(object):
    """Compute and stores the average and current value"""
    def __init__(self):
        self.reset()
        
    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0
        
    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = (self.sum / self.count)

In [51]:
def train_fn(train_loader,
             model,
             optimizer,
             epoch,
             scheduler,
             device):
    losses = AverageMeter()
    
    model.train()
    
    for step, (X, y) in enumerate(train_loader):
        X, y = X.to(device), y.to(device)
        
        batch_size = X.size(0)
        pred = model(X)
        loss = nn.BCEWithLogitsLoss()(pred, y)
        losses.update(loss.item(), batch_size)
        
        if GRADIENT_ACCUMULATION_STEPS > 1:
            loss /= GRADIENT_ACCUMULATION_STEPS
        
        loss.backward()
        
        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), MAX_GRAD_NORM)
        
        if (step + 1) % GRADIENT_ACCUMULATION_STEPS == 0:
            scheduler.step()
            optimizer.step()
            optimizer.zero_grad()
            
    return losses.avg

In [52]:
def validate_fn(valid_loader, model, device):
    losses = AverageMeter()
    model.eval()
    val_preds = []
    
    for step, (X, y) in enumerate(valid_loader):
        X, y = X.to(device), y.to(device)
        batch_size = X.size(0)
                
        with torch.no_grad():
            pred = model(X)
            
        loss = nn.BCEWithLogitsLoss()(pred, y)
        
        losses.update(loss.item(), batch_size)
        
        val_preds.append(pred.sigmoid().detach().cpu().numpy())
        
        if GRADIENT_ACCUMULATION_STEPS > 1:
            loss /= GRADIENT_ACCUMULATION_STEPS
            
    val_preds = np.concatenate(val_preds)
    
    return losses.avg, val_preds

In [53]:
def inference_fn(test_loader, model, device):
    model.eval()
    preds = []

    for step, (X) in enumerate(test_loader):
        X = X.to(device)
        
        with torch.no_grad():
            pred = model(X)
            
        preds.append(pred.sigmoid().detach().cpu().numpy())

    preds = np.concatenate(preds)
    preds = np.concatenate(preds)
    
    return preds

In [54]:
from sklearn.model_selection import KFold

def make_fold_on_drug(train, n_seeds, n_folds, DRUG_THRESH):
    drug_count = train['drug_id'].value_counts()
    drugs1 = drug_count.loc[drug_count <= DRUG_THRESH].index.sort_values()
    drugs2 = drug_count.loc[drug_count > DRUG_THRESH].index.sort_values()

    for seed in range(SEEDS):
        fold_col = f'fold{seed}'
        fold_map = {}
        
        # stratify drug 18x or less        
        skf = MultilabelStratifiedKFold(n_splits=NFOLDS, shuffle=True, random_state=seed_id)
        tmp = train.groupby('drug_id')[target_cols].mean().loc[drugs1]
        
        for _fold, (train_idx, valid_idx) in enumerate(skf.split(tmp, tmp[target_cols])):
            dd = {k: _fold for k in tmp.index[valid_idx].values}
            fold_map.update(dd)
            
        # stratify drug more than 18x        
        skf = MultilabelStratifiedKFold(n_splites=NFOLDS, shuffle=True, random_state=seed_id)
        tmp = train.loc[train['drug_id'].isin(drugs2)].reset_index(drop=True)
        
        for _fold, (train_idx, valid_idx) in enumerate(skf.split(tmp, tmp[target_cols])):
            dd = {k: _fold for k in tmp['sig_id'][valid_index].values}
            fold_map.update(dd)
            
        # assign folds
        train[kfold_col] = train['drug_id'].map(fold_map)
        train[kfold_col] = train[kfold_col].astype('int8')
        
    return train

In [None]:
def train_model(model, tag_name, features, targerts, fine_tune_scheduler=None):
    
    X_train, y_train = df_train[features].values, df_train[targets].values
    X_valid, y_valid = df_valid[features].values, df_valid[targets].values
    
    train_dataset = MoADataset(X_train, y_train)
    valid_dataset = MoADataset(X_valid, y_valid)
    
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    valid_loader = DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False)
    
    optimizer = optim.Adam(model.parameters(),
                           lr=LEARNING_RATE,
                           weight_decay=WEIGHT_DECAY)
    
    scheduler = optim.lr_scheduler.OneCycleLR(optimizer=optimizer,
                                              pct_start=0.1,
                                              div_factor=1e3,
                                              max_lr=1e-2,
                                              epochs=EPOCHS,
                                              steps_per_epoch=len(train_loader))
    
    # train & valid
    best_loss = np.inf
    for epoch in range(EPOCHS):
        train_loss = train_fn(train_loader,
                              model,
                              optimizer,
                              epoch,
                              scheduler,
                              device)
        
        valid_loss, valid_pred = validate_fn(valid_loader, model, device)   
        print(f"SEED: {seed_id}, FOLD: {fold_id}, {tag_name}, EPOCH: {epoch}, train_loss: {train_loss:.6f}, valid_loss: {valid_loss:.6f}")

        if valid_loss < best_loss:
            print(f'epoch{epoch} save best model ... {train_loss}, {valid_loss}')
            best_loss = valid_loss
            oof = np.zeros((len(train), len(targets)))
            oof[valid_index] = valid_pred
            torch.save(model.state_dict(), model_file)





In [24]:
def run_single_fold(train,
                    test,
                    features,
                    targets,
                    device,
                    fold_num=0,
                    seed=42):
    
    model_file = os.path.join(output_dir, f'fold{fold_num}_seed{seed}.pth')
    seed_everything(seed)
    
    train_index = train.loc[train['fold'] != fold_num].index
    valid_index = train.loc[train['fold'] == fold_num].index
    
    train_data = train.loc[train_index][features].reset_index(drop=True)
    valid_data = train.loc[valid_index][features].reset_index(drop=True)
    
    train_targets = train.loc[train_index][targets]
    valid_targets = train.loc[valid_index][targets]
    
    train_dataset = TrainDataset(train_data, train_targets)
    valid_dataset = TrainDataset(valid_data, valid_targets)
    
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, pin_memory=True, drop_last=True)
    valid_loader = DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, pin_memory=True, drop_last=False)
    
    # model
    model = MLP()
    model.to(device)
    
    optimizer = optim.Adam(model.parameters(),
                           lr=LEARNING_RATE,
                           weight_decay=WEIGHT_DECAY)
    
    scheduler = optim.lr_scheduler.OneCycleLR(optimizer=optimizer,
                                              pct_start=0.1,
                                              div_factor=1e3,
                                              max_lr=1e-2,
                                              epochs=EPOCHS,
                                              steps_per_epoch=len(train_loader))
    

    # train & valid
    best_loss = np.inf
    for epoch in range(EPOCHS):
        train_loss = train_fn(train_loader,
                              model,
                              optimizer,
                              epoch,
                              scheduler,
                              device)
        
        valid_loss, valid_pred = validate_fn(valid_loader, model, device)        
        
        if valid_loss < best_loss:
            print(f'epoch{epoch} save best model ... {train_loss}, {valid_loss}')
            best_loss = valid_loss
            oof = np.zeros((len(train), len(targets)))
            oof[valid_index] = valid_pred
            torch.save(model.state_dict(), model_file)
            
    # predictions
    test_dataset = TestDataset(test[features])
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, pin_memory=True)
    
    model = MLP()
    model.load_state_dict(torch.load(model_file))
    model.to(device)
    
    predictions = inference_fn(test_loader, model, device)
    
    # delete
    torch.cuda.empty_cache()
    
    return oof, predictions

In [24]:
def run_single_fold(train,
                    test,
                    features,
                    targets,
                    device,
                    fold_num=0,
                    seed=42):
    
    model_file = os.path.join(output_dir, f'fold{fold_num}_seed{seed}.pth')
    seed_everything(seed)
    
    train_index = train.loc[train['fold'] != fold_num].index
    valid_index = train.loc[train['fold'] == fold_num].index
    
    train_data = train.loc[train_index][features].reset_index(drop=True)
    valid_data = train.loc[valid_index][features].reset_index(drop=True)
    
    train_targets = train.loc[train_index][targets]
    valid_targets = train.loc[valid_index][targets]
    
    train_dataset = TrainDataset(train_data, train_targets)
    valid_dataset = TrainDataset(valid_data, valid_targets)
    
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, pin_memory=True, drop_last=True)
    valid_loader = DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, pin_memory=True, drop_last=False)
    
    fine_tune_scheduler = FineTuneScheduler(EPOCHS)

    pretrained_model = Model(num_features, num_all_targets)
    pretrained_model.to(device)
    
    # Train on scored & non-scored targets
    train_model(pretrained_model, 'ALL_TARGETS', all_targets_cols)
    
    # Load the pretrained model with the best loss
    pretrained_model = Model(num_feature, num_all_features)
    pretrained_model.load_state_dict(torch.load(f'ALL_TARGETS_FOLD{fold_num}.pth'))
    pretrained_model.to(DEVICE)
    
    # Copy model wihout the top layer
    final_model = fine_tune_scheduler.copy_without_top(pretrained_model, num_features, num_all_features, num_targets)
    
    # Fine-tune the model on scored targets only.
    oof = train_model(final_model, 'SCORED_ONLY', targets, fine_tune_scheduler)
    
    # Load the fine-tuned model with the best loss.
    model = Model(num_features, num_targets)
    model.load_state_dict(torch.load(f'SCORED_ONLY_FOLD{fold_num}.pth'))
    model.to(DEVICE)
            
    # predictions
    test_dataset = TestDataset(test[features])
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, pin_memory=True)
    
    model = MLP()
    model.load_state_dict(torch.load(model_file))
    model.to(device)
    
    predictions = inference_fn(test_loader, model, device)
    
    # delete
    torch.cuda.empty_cache()
    
    return oof, predictions

In [25]:
def run_oof(train,
            test, 
            features,
            targets,
            device,
            n_fold=5,
            seed=42):
    
    oof = np.zeros((len(train), len(targets)))
    predictions = np.zeros((len(test), len(targets)))
        
    train['fold'] = 0
    mlskf = MultilabelStratifiedKFold(n_splits = NUM_FOLDS, shuffle=True, random_state=seed)
    for n, (train_index, valid_index) in enumerate(mlskf.split(train, train[targets])):
        train.loc[valid_index, 'fold'] = int(n)
        
    for _fold in range(n_fold):
        print(f'Fold {_fold+1}')
        _oof, _predictions = run_single_fold(train,
                                             test,
                                             features,
                                             targets,
                                             device,
                                             fold_num=_fold,
                                             seed=seed)
        oof += _oof
        predictions += (_predictions / n_fold)
        
    score = 0
    for i, t in enumerate(targets):
        _score = log_loss(train[t].values, oof[:, i])
        score += (_score / NUM_TARGETS)
    
    print(f'CV score: {score}')
    
    return oof, predictions

In [26]:
# Seed Averaging for solid result

oof = np.zeros((len(train), NUM_TARGETS))
predictions = np.zeros((len(test), NUM_TARGETS))

SEED = [0, 1, 2]

for seed in SEED:
    _oof, _predictions = run_oof(train,
                                 test,
                                 all_features,
                                 targets,
                                 device,
                                 n_fold=5,
                                 seed=seed)
    
    oof += (_oof / len(SEED))
    predictions += (_predictions / len(SEED))
    
score = 0
for i, t in enumerate(targets):
    _score = log_loss(train[t].values, oof[:, i])
    score += (_score / NUM_TARGETS)
    
print(f'Seed Averaged CV score : {score}')

Fold 1
epoch0 save best model ... 0.14015652847401525, 0.018638518507135948
epoch2 save best model ... 0.0185154103011192, 0.017833404725947556
epoch3 save best model ... 0.017432120071221007, 0.01757509785151123
epoch4 save best model ... 0.016795368489544212, 0.017133058297689126
epoch6 save best model ... 0.0160719108084611, 0.01701771395905234
epoch8 save best model ... 0.015816255945059723, 0.016914904175301188
epoch9 save best model ... 0.015530979435850125, 0.016874581486908482
epoch11 save best model ... 0.014628159104894003, 0.016684068913710947
Fold 2
epoch0 save best model ... 0.14023638930151333, 0.018518766421823846
epoch2 save best model ... 0.018525662325512973, 0.01782473893384166
epoch3 save best model ... 0.01734953342909741, 0.017614373857888364
epoch4 save best model ... 0.0167312769025537, 0.01747169713817552
epoch5 save best model ... 0.016301187805586707, 0.01726651219424055
epoch10 save best model ... 0.015101789488341578, 0.017218567936845237
Fold 3
epoch0 save

In [27]:
train[targets] = oof
train[['sig_id'] + targets].to_csv(os.path.join(output_dir, 'oof.csv'), index=False)

for c in targets:
    test[c] = 0
    
test[targets] = predictions
test[['sig_id'] + targets].to_csv(os.path.join(output_dir, 'pred.csv'), index=False)

In [28]:
result = targets_scored.drop(columns=targets).merge(train[['sig_id'] + targets], on='sig_id', how='left').fillna(0)

y_true = targets_scored[targets].values
y_pred = result[targets].values

score = 0
for i in range(NUM_TARGETS):
    _score = log_loss(y_true[:, i], y_pred[:, i])
    score += (_score / NUM_TARGETS)
    
print(f'Final result : {score}')

Final result : 0.014870948231512835


### Submit

In [29]:
if ON_KAGGLE:
    sub_file = 'submission.csv'
else:
    sub_file = os.path.join(output_dir, 'submission.csv')

In [30]:
sub = submission.drop(columns=targets).merge(test[['sig_id'] + targets], on='sig_id', how='left').fillna(0)
sub.to_csv(sub_file, index=False)
sub.head()

Unnamed: 0,sig_id,5-alpha_reductase_inhibitor,11-beta-hsd1_inhibitor,acat_inhibitor,acetylcholine_receptor_agonist,acetylcholine_receptor_antagonist,acetylcholinesterase_inhibitor,adenosine_receptor_agonist,adenosine_receptor_antagonist,adenylyl_cyclase_activator,...,tropomyosin_receptor_kinase_inhibitor,trpv_agonist,trpv_antagonist,tubulin_inhibitor,tyrosine_kinase_inhibitor,ubiquitin_specific_protease_inhibitor,vegfr_inhibitor,vitamin_b,vitamin_d_receptor_agonist,wnt_inhibitor
0,id_0004d9e33,0.000558,0.00106,0.00189,0.009645,0.018072,0.005951,0.001248,0.006448,7.9e-05,...,0.000654,0.001708,0.002776,0.001144,0.000424,0.000502,0.000606,0.001265,0.003285,0.001376
1,id_001897cda,0.000399,0.001101,0.002146,0.002328,0.002341,0.003318,0.009224,0.011367,0.024113,...,0.00045,0.000682,0.003648,0.000369,0.031975,0.000373,0.014118,0.000811,0.002732,0.004331
2,id_002429b5b,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,id_00276f245,0.000581,0.000499,0.003548,0.017654,0.013674,0.006719,0.00538,0.005945,0.00021,...,0.000728,0.001412,0.004233,0.044727,0.011808,0.000429,0.001231,0.002732,0.002335,0.004616
4,id_0027f1083,0.001649,0.001153,0.002538,0.015504,0.023633,0.004503,0.007552,0.001953,0.000275,...,0.000843,0.000814,0.003958,0.004164,0.001363,0.000725,0.001245,0.001979,0.000116,0.001505
