In [None]:
from Data.Drosophilla.FlyDataMod import FlyDataModule
from Models.Transformer import PositionalEncoding, TransformerModule
from Models import BiLSTM as bi
from pytorch_lightning.callbacks import EarlyStopping
from IPython.core.debugger import set_trace
import pytorch_lightning as pl
import os
import matplotlib.pyplot as plt
import yaml
import glob

from Models import BiLSTM as bi
from Models import Transformer as tr
from Models import LinearRegression as ln

from Utils import evaluations as ev


In [None]:
cell_line="S2"
data_win_radius=5
batch_size=4
label_type="gamma"

params = yaml.full_load(open(glob.glob("Experiments/Table_7_LSTM_Tunning_Gamma/BEST_HPARAMS.yaml")[0], 'r'))
for exclude_feature in list(reversed(range(0,29))):
    
    early_stop_callback = EarlyStopping(
    monitor="val weighted mse loss",
    min_delta=0.00,
    patience=3,
    verbose=False,
    mode='min')
    
    dm = FlyDataModule(cell_line=cell_line,
                  data_win_radius=data_win_radius,
                  batch_size=batch_size,
                  label_type=label_type,
                  exclude_feature=exclude_feature)
    dm.setup()
    
    hparams={'cell_line':cell_line,
            'data_win_radius':data_win_radius,
            'label_type':label_type,
            "batch_size":batch_size,
            "exclude_feature":exclude_feature}
    
    model = bi.BiLSTMModule(
                input_size=28,
                hidden_size=params['hidden_size'],
                num_layers=params['num_layers'],
                dropout=params['dropout'],
                bias=params['bias'],
                lr=params['lr'],
                hparams=hparams)
    
    model.cuda()
    
    rootdir = "Experiments/LSTM_Exclude_Features"
    if not os.path.isdir(rootdir):
        os.mkdir(rootdir)
        
    trainer = pl.Trainer(
                gpus=1,
                max_epochs=50,
                default_root_dir=rootdir,
                callbacks = [early_stop_callback])

    trainer.fit(model, dm)

In [None]:
##This shows comparison of model when excluding a single value
import glob

metrics       = ['mse','mae','r2','pearson','spearman']
trans_results = {}
lstm_results  = {}

for i, exclude_feature in enumerate(range(0,29)):
        dm = FlyDataModule(cell_line="S2",
                  data_win_radius=5,
                  batch_size=1,
                  label_type='gamma',
                  label_val=3,
                  exclude_feature=exclude_feature)
        dm.setup()
        trans_weights  = glob.glob(
            "".join(['Experiments/',
                     'Transformer_Exclude_Features/',
                     'lightning_logs/version_'+str(i)+'/checkpoints/*']))[0]
        lstm_weights  = glob.glob(
            "".join(['Experiments/',
                     'LSTM_Exclude_Features/',
                     'lightning_logs/version_'+str(i)+'/checkpoints/*']))[0]
        trans_model   = tr.TransformerModule.load_from_checkpoint(trans_weights).to("cuda:0")
        lstm_model    = bi.BiLSTMModule.load_from_checkpoint(lstm_weights).to("cuda:0")
        
        met_ret_lstm  = ev.getModelMetrics(lstm_model, dm, 'test')
        met_ret_trans = ev.getModelMetrics(trans_model, dm, 'test')
        for metric in metrics:
            if metric not in trans_results.keys():
                lstm_results[metric] = []
                trans_results[metric] = []
            lstm_results[metric].append(met_ret_lstm[metric])
            trans_results[metric].append(met_ret_trans[metric])


import pandas as pd
exf = pd.read_csv("Data/Drosophilla/s2_kc_bg_scaled_18_features_2901.csv")
excluded_features = exf.columns[6:]

        

In [None]:
print(lstm_results['spearman'])
print(trans_results['spearman'])

In [None]:
for sort_metric in metrics:
    trans_mse = [x for _, x in sorted(zip(trans_results[sort_metric],
                                     trans_results[sort_metric]),
                                 key=lambda pair: pair[0])]

    lstm_mse  = [x for _, x in sorted(zip(trans_results[sort_metric],
                                       lstm_results[sort_metric]),
                                 key=lambda pair: pair[0])]

    excluded_features_sorted   = [x for _, x in sorted(zip(trans_results[sort_metric],
                                                excluded_features),
                                              key=lambda pair: pair[0])]
    fig, ax = plt.subplots(figsize=(20,3))
    ax.plot(trans_mse, label="transformer")
    ax.plot(lstm_mse, label="lstm")
    ax.set_xticks(list(range(0, len(excluded_features_sorted))))
    ax.set_xticklabels(excluded_features_sorted, rotation=45, size=16)

    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.set_ylabel(sort_metric, size=16)
    ax.legend()
    plt.show()


In [None]:
##This shows comparison of model performance on different metrics
import glob

metrics       = ['mse','mae','r2','pearson','spearman']
gamma_results = {}
insul_results = {}
direc_results = {}

for i, exclude_feature in enumerate(range(0,29)):
        dm_gamma = FlyDataModule(cell_line="S2",
                  data_win_radius=5,
                  batch_size=1,
                  label_type='gamma',
                  label_val=3,
                  exclude_feature=exclude_feature)
        dm_gamma.setup()
        dm_insul = FlyDataModule(cell_line="S2",
                  data_win_radius=5,
                  batch_size=1,
                  label_type='insulation',
                  label_val=3,
                  exclude_feature=exclude_feature)
        dm_insul.setup()
        dm_direc = FlyDataModule(cell_line="S2",
                  data_win_radius=5,
                  batch_size=1,
                  label_type='directionality',
                  label_val=10,
                  exclude_feature=exclude_feature)
        dm_direc.setup()
        
        gamma_weights  = glob.glob(
            "".join(['Experiments/',
                     'Transformer_Exclude_Features/',
                     'lightning_logs/version_'+str(i)+'/checkpoints/*']))[0]
        insul_weights  = glob.glob(
            "".join(['Experiments/',
                     'Transformer_Exclude_Features_Insulation/',
                     'lightning_logs/version_'+str(i)+'/checkpoints/*']))[0]
        direc_weights  = glob.glob(
            "".join(['Experiments/',
                     'Transformer_Exclude_Features_Direction/',
                     'lightning_logs/version_'+str(i)+'/checkpoints/*']))[0]
        gamma_model    = tr.TransformerModule.load_from_checkpoint(gamma_weights).to("cuda:0")
        insul_model    = tr.TransformerModule.load_from_checkpoint(insul_weights).to("cuda:0")
        direc_model    = tr.TransformerModule.load_from_checkpoint(direc_weights).to("cuda:0")


        
        met_ret_gamma  = ev.getModelMetrics(gamma_model, dm_gamma, 'test')
        met_ret_insul  = ev.getModelMetrics(insul_model, dm_insul, 'test')
        met_ret_direc  = ev.getModelMetrics(direc_model, dm_direc, 'test')

        for metric in metrics:
            if metric not in gamma_results.keys():
                gamma_results[metric] = []
                insul_results[metric] = []
                direc_results[metric] = []

            gamma_results[metric].append(met_ret_gamma[metric])
            insul_results[metric].append(met_ret_insul[metric])
            direc_results[metric].append(met_ret_direc[metric])


In [None]:
for sort_metric in metrics:
    gamma_met  = [x for _, x in sorted(zip(gamma_results[sort_metric],
                                     gamma_results[sort_metric]),
                                 key=lambda pair: pair[0])]

    insul_met = [x for _, x in sorted(zip(gamma_results[sort_metric],
                                       insul_results[sort_metric]),
                                 key=lambda pair: pair[0])]

    direc_met = [x for _, x in sorted(zip(gamma_results[sort_metric],
                                       direc_results[sort_metric]),
                                 key=lambda pair: pair[0])]
    
    excluded_features_sorted   = [x for _, x in sorted(zip(gamma_results[sort_metric],
                                                excluded_features),
                                              key=lambda pair: pair[0])]
    fig, ax = plt.subplots(figsize=(20,3))
    ax.plot(gamma_met, label="gamma", color='orange')
    ax.plot(insul_met, label="insulation", color='limegreen')
    ax.plot(direc_met, label="directionality", color='cornflowerblue')
    ax.set_xticks(list(range(0, len(excluded_features_sorted))))
    ax.set_xticklabels(excluded_features_sorted, rotation=45, size=16)

    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.set_ylabel(sort_metric, size=16)
    ax.legend()
    plt.show()


In [None]:
##This shows comparison of model when excluding a single value vs excluding individual value
import glob

metrics       = ['mse','mae','r2','pearson','spearman']
solo_results  = {}
excl_results ={}


for i, exclude_feature in enumerate(range(0,29)):
        dm_excl = FlyDataModule(cell_line="S2",
                  data_win_radius=5,
                  batch_size=1,
                  label_type='gamma',
                  label_val=3,
                  exclude_feature=exclude_feature)
        dm_excl.setup()
        dm_solo = FlyDataModule(cell_line="S2",
                  data_win_radius=5,
                  batch_size=1,
                  label_type='gamma',
                  label_val=3,
                  solo_feature=exclude_feature)
        dm_solo.setup()
        solo_weights  = glob.glob(
            "".join(['Experiments/',
                     'Transformer_Solo_Features_gamma/',
                     'lightning_logs/version_'+str(i)+'/checkpoints/*']))[0]
        exclude_weights  = glob.glob(
            "".join(['Experiments/',
                     'Transformer_Exclude_Features/',
                     'lightning_logs/version_'+str(i)+'/checkpoints/*']))[0]
        excl_model    = tr.TransformerModule.load_from_checkpoint(exclude_weights).to("cuda:0")
        solo_model       = tr.TransformerModule.load_from_checkpoint(solo_weights).to("cuda:0")
        
        met_ret_solo  = ev.getModelMetrics(solo_model, dm_solo, 'test')
        met_ret_excl  = ev.getModelMetrics(excl_model, dm_excl, 'test')
        for metric in metrics:
            if metric not in solo_results.keys():
                solo_results[metric] = []
                excl_results[metric] = []

            solo_results[metric].append(met_ret_solo[metric])
            excl_results[metric].append(met_ret_excl[metric])
print(solo_results['pearson'])

In [None]:
for sort_metric in metrics:
    solo_met  = [x for _, x in sorted(zip(solo_results[sort_metric],
                                     solo_results[sort_metric]),
                                 key=lambda pair: pair[0])]

    excl_met = [x for _, x in sorted(zip(solo_results[sort_metric],
                                       excl_results[sort_metric]),
                                 key=lambda pair: pair[0])]

    excluded_features_sorted   = [x for _, x in sorted(zip(solo_results[sort_metric],
                                                excluded_features),
                                              key=lambda pair: pair[0])]
    fig, ax = plt.subplots(figsize=(20,3))
    ax.plot(solo_met, label="solo", color='grey')
    ax.plot(excl_met, label="excluded", color='black')
    ax.set_xticks(list(range(0, len(excluded_features_sorted))))
    ax.set_xticklabels(excluded_features_sorted, rotation=45, size=16)

    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.set_ylabel(sort_metric, size=16)
    ax.legend()
    plt.show()