# Evaluación de importancia de cada predictor bioquímico global

## Vamos a ver cómo afecta cada predictor bioquímico por separado al rendimiento del modelo AMP-BERT-BIOCHEM

In [1]:
import pandas as pd
import torch

SEED = 0
FRAC_TRAIN = 0.5

torch.manual_seed(SEED)

df = pd.read_csv('./datasets/database_all_propiedades.csv', index_col = 0)
print(df.columns)

Index(['PubMed_ID', 'Sequence', 'Activity', 'code', 'AMP', 'len_seq',
       'molecular_mass', 'aa_count', 'aa_freq', 'aa_missing', 'aa_more_freq',
       'hydrophobic_freq', 'hydrophilic_freq', 'basic_freq', 'acid_freq',
       'charge', 'aliphatic_index', 'average_hydrophobicity',
       'isoelectric_point'],
      dtype='object')


In [2]:
df = df.rename(columns = {
    'Sequence': 'aa_seq',
})

df_pos = df[df["AMP"] == True]
df_neg = df[df["AMP"] == False]

train_df_pos = df_pos.sample(frac=FRAC_TRAIN, random_state=SEED)
train_df_neg = df_neg.sample(frac=FRAC_TRAIN, random_state=SEED)
test_df_pos = df_pos.drop(train_df_pos.index)
test_df_neg = df_neg.drop(train_df_neg.index)

train_df = pd.concat([train_df_pos, train_df_neg])
test_df = pd.concat([test_df_pos, test_df_neg])

print(f"Train dataframe: {len(train_df_pos)} positives, {len(test_df_neg)} negatives ({len(train_df_pos)/len(train_df)}%)")
print(f"Test dataframe: {len(test_df_pos)} positives, {len(test_df_neg)} negatives ({len(test_df_pos)/len(test_df)}%)")

Train dataframe: 4443 positives, 2816 negatives (0.6120677779308444%)
Test dataframe: 4443 positives, 2816 negatives (0.6120677779308444%)


In [3]:
from pipeline_tools import AMP_BioChemDataLoader, train_model, eval_model, compute_metrics
from torch.optim import AdamW
from transformers import BertForSequenceClassification, get_linear_schedule_with_warmup
from MultiGPUModels import MultiGPUBertForPeptideClassification
import torch
from torch.nn import CrossEntropyLoss
from copy import deepcopy

BATCH_SIZE = 8
LEARNING_RATE = 5e-6
WEIGHT_DECAY = 0.01
EPOCHS = 1
            
biochem_cols = [
    "molecular_mass",
    "hydrophobic_freq",
    "hydrophilic_freq",
    "basic_freq",
    "acid_freq",
    "charge",
    "aliphatic_index",
    "average_hydrophobicity",
    "isoelectric_point"
]


bert_model = BertForSequenceClassification.from_pretrained('Rostlab/prot_bert_bfd')

Some weights of the model checkpoint at Rostlab/prot_bert_bfd were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not init

In [4]:
def train_with_cols(biochem_cols):
    train_dataloader = AMP_BioChemDataLoader(train_df, biochem_cols, batch_size = BATCH_SIZE)            
    test_dataloader = AMP_BioChemDataLoader(test_df, biochem_cols, batch_size = BATCH_SIZE)            

    multi_gpu_bert_pred = MultiGPUBertForPeptideClassification(bert_model, biochem_cols)
            
    # Entrenar el modelo con esta configuracion
    optimizer = AdamW(
        multi_gpu_bert_pred.parameters(), 
        lr = LEARNING_RATE, 
        weight_decay = WEIGHT_DECAY)
            
    total_steps = len(train_dataloader) * EPOCHS
            
    scheduler = get_linear_schedule_with_warmup(optimizer, 
        num_warmup_steps = 0, 
        num_training_steps = total_steps)

    for i in range(EPOCHS):
        train_model(multi_gpu_bert_pred, train_dataloader, CrossEntropyLoss(), optimizer, scheduler, True)
            
    # Obtener las métricas de validacion
    test_labels, test_preds = eval_model(multi_gpu_bert_pred, test_dataloader, CrossEntropyLoss(), True)
    test_metrics = compute_metrics(test_labels, test_preds)

    print(f"Metrics for test set: ")
    print(test_metrics)
    
    del multi_gpu_bert_pred
    return test_metrics
    
            
    

In [5]:
for i in range(5):
    test_metrics_all_biochem = train_with_cols(biochem_cols)
    test_metrics_all_biochem.to_csv(f'./newdata_results/test_metrics_all_biochem({i}).csv')
    
for i in range(5):
    test_metrics_no_biochem = train_with_cols([])
    test_metrics_no_biochem.to_csv(f'./newdata_results/test_metrics_no_biochem({i}).csv')

    for biochem_pred in biochem_cols:
        test_metrics_pred = train_with_cols([biochem_pred])
        test_metrics_pred.to_csv(f'./newdata_results/test_metrics_only_pred_{biochem_pred}({i}).csv')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value, self.name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_wi

Step 10/908: Loss (avg) 0.4800467231969535, Step Time 708 ms, ETA 10:35
Step 20/908: Loss (avg) 0.6320446951765646, Step Time 712 ms, ETA 10:32
Step 30/908: Loss (avg) 0.6960920497034587, Step Time 716 ms, ETA 10:28
Step 40/908: Loss (avg) 0.6981060478982293, Step Time 717 ms, ETA 10:22
Step 50/908: Loss (avg) 0.6964454551276084, Step Time 719 ms, ETA 10:16
Step 60/908: Loss (avg) 0.6880628435269899, Step Time 718 ms, ETA 10:8
Step 70/908: Loss (avg) 0.6612105157653172, Step Time 722 ms, ETA 10:5
Step 80/908: Loss (avg) 0.65030337254263, Step Time 722 ms, ETA 9:57
Step 90/908: Loss (avg) 0.6407692354252933, Step Time 723 ms, ETA 9:51
Step 100/908: Loss (avg) 0.6782560402596279, Step Time 723 ms, ETA 9:44
Step 110/908: Loss (avg) 0.7036282770880722, Step Time 723 ms, ETA 9:36
Step 120/908: Loss (avg) 0.7066808565468736, Step Time 724 ms, ETA 9:30
Step 130/908: Loss (avg) 0.6904439234813065, Step Time 724 ms, ETA 9:23
Step 140/908: Loss (avg) 0.6712617974974237, Step Time 724 ms, ETA 9:1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value, self.name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_wi

Step 10/908: Loss (avg) 0.41311610933771176, Step Time 238 ms, ETA 3:33
Step 20/908: Loss (avg) 0.4925853726233984, Step Time 237 ms, ETA 3:30
Step 30/908: Loss (avg) 0.5334291610932731, Step Time 237 ms, ETA 3:28
Step 40/908: Loss (avg) 0.48235458715098345, Step Time 236 ms, ETA 3:24
Step 50/908: Loss (avg) 0.5723916515817699, Step Time 235 ms, ETA 3:21
Step 60/908: Loss (avg) 0.6364609174911092, Step Time 236 ms, ETA 3:20
Step 70/908: Loss (avg) 0.5902989151085756, Step Time 237 ms, ETA 3:18
Step 80/908: Loss (avg) 0.5014348505197097, Step Time 236 ms, ETA 3:15
Step 90/908: Loss (avg) 0.556573935709985, Step Time 236 ms, ETA 3:13
Step 100/908: Loss (avg) 0.5667859158457584, Step Time 236 ms, ETA 3:10
Step 110/908: Loss (avg) 0.5733412352631156, Step Time 236 ms, ETA 3:8
Step 120/908: Loss (avg) 0.5559124824291495, Step Time 236 ms, ETA 3:5
Step 130/908: Loss (avg) 0.5746613278663402, Step Time 235 ms, ETA 3:2
Step 140/908: Loss (avg) 0.6301714616085297, Step Time 235 ms, ETA 3:0
Step

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value, self.name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_wi

Step 10/908: Loss (avg) 0.43908457014506647, Step Time 723 ms, ETA 10:49
Step 20/908: Loss (avg) 0.552901188129883, Step Time 724 ms, ETA 10:42
Step 30/908: Loss (avg) 0.6201166354051769, Step Time 726 ms, ETA 10:37
Step 40/908: Loss (avg) 0.6174440692068924, Step Time 727 ms, ETA 10:31
Step 50/908: Loss (avg) 0.6032976815193248, Step Time 730 ms, ETA 10:26
Step 60/908: Loss (avg) 0.6030318126580877, Step Time 730 ms, ETA 10:19
Step 70/908: Loss (avg) 0.6210278491744262, Step Time 729 ms, ETA 10:10
Step 80/908: Loss (avg) 0.5861861752231922, Step Time 730 ms, ETA 10:4
Step 90/908: Loss (avg) 0.5637540724061764, Step Time 729 ms, ETA 9:56
Step 100/908: Loss (avg) 0.5894298015171848, Step Time 731 ms, ETA 9:50
Step 110/908: Loss (avg) 0.5482459295184593, Step Time 727 ms, ETA 9:40
Step 120/908: Loss (avg) 0.5154499400436915, Step Time 732 ms, ETA 9:36
Step 130/908: Loss (avg) 0.5294918000909382, Step Time 732 ms, ETA 9:29
Step 140/908: Loss (avg) 0.5481794064576132, Step Time 732 ms, ETA

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value, self.name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_wi

Step 10/908: Loss (avg) 0.358966485613917, Step Time 240 ms, ETA 3:35
Step 20/908: Loss (avg) 0.4395500548258204, Step Time 236 ms, ETA 3:29
Step 30/908: Loss (avg) 0.5779298305601178, Step Time 235 ms, ETA 3:26
Step 40/908: Loss (avg) 0.6023179134574217, Step Time 236 ms, ETA 3:24
Step 50/908: Loss (avg) 0.5457040325871039, Step Time 237 ms, ETA 3:23
Step 60/908: Loss (avg) 0.5256506121217001, Step Time 237 ms, ETA 3:20
Step 70/908: Loss (avg) 0.5489012633585292, Step Time 236 ms, ETA 3:17
Step 80/908: Loss (avg) 0.5684826817527987, Step Time 236 ms, ETA 3:15
Step 90/908: Loss (avg) 0.5917009356670409, Step Time 236 ms, ETA 3:13
Step 100/908: Loss (avg) 0.6132272614962943, Step Time 237 ms, ETA 3:11
Step 110/908: Loss (avg) 0.6024505319342205, Step Time 236 ms, ETA 3:8
Step 120/908: Loss (avg) 0.5702996895582544, Step Time 238 ms, ETA 3:7
Step 130/908: Loss (avg) 0.5459716024356814, Step Time 238 ms, ETA 3:5
Step 140/908: Loss (avg) 0.5674450990417814, Step Time 236 ms, ETA 3:1
Step 1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value, self.name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_wi

Step 10/908: Loss (avg) 0.46104800058353607, Step Time 721 ms, ETA 10:47
Step 20/908: Loss (avg) 0.6214307571317408, Step Time 723 ms, ETA 10:42
Step 30/908: Loss (avg) 0.67089142811695, Step Time 727 ms, ETA 10:38
Step 40/908: Loss (avg) 0.6623514526677019, Step Time 726 ms, ETA 10:30
Step 50/908: Loss (avg) 0.6627527532756321, Step Time 728 ms, ETA 10:24
Step 60/908: Loss (avg) 0.6184575164099484, Step Time 727 ms, ETA 10:16
Step 70/908: Loss (avg) 0.5972468489946764, Step Time 729 ms, ETA 10:10
Step 80/908: Loss (avg) 0.6008107506189616, Step Time 727 ms, ETA 10:1
Step 90/908: Loss (avg) 0.5715387320418719, Step Time 726 ms, ETA 9:53
Step 100/908: Loss (avg) 0.5602891845906534, Step Time 733 ms, ETA 9:52
Step 110/908: Loss (avg) 0.5768482753857085, Step Time 729 ms, ETA 9:41
Step 120/908: Loss (avg) 0.6197724956434973, Step Time 731 ms, ETA 9:36
Step 130/908: Loss (avg) 0.5746439592983679, Step Time 730 ms, ETA 9:27
Step 140/908: Loss (avg) 0.5641141711203085, Step Time 723 ms, ETA 

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value, self.name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_wi

Step 10/908: Loss (avg) 0.3814550679962804, Step Time 234 ms, ETA 3:30
Step 20/908: Loss (avg) 0.5073996631751088, Step Time 235 ms, ETA 3:28
Step 30/908: Loss (avg) 0.5101155785722854, Step Time 235 ms, ETA 3:26
Step 40/908: Loss (avg) 0.539511069151562, Step Time 237 ms, ETA 3:25
Step 50/908: Loss (avg) 0.520356519021117, Step Time 237 ms, ETA 3:23
Step 60/908: Loss (avg) 0.5451938037002031, Step Time 237 ms, ETA 3:20
Step 70/908: Loss (avg) 0.4896360870484252, Step Time 237 ms, ETA 3:18
Step 80/908: Loss (avg) 0.5463311447936983, Step Time 237 ms, ETA 3:16
Step 90/908: Loss (avg) 0.5367023606906033, Step Time 237 ms, ETA 3:13
Step 100/908: Loss (avg) 0.6513136414696961, Step Time 238 ms, ETA 3:12
Step 110/908: Loss (avg) 0.6536373613231541, Step Time 236 ms, ETA 3:8
Step 120/908: Loss (avg) 0.6055737241626601, Step Time 236 ms, ETA 3:5
Step 130/908: Loss (avg) 0.5386597485837614, Step Time 235 ms, ETA 3:2
Step 140/908: Loss (avg) 0.6093976682239913, Step Time 238 ms, ETA 3:2
Step 15

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value, self.name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_wi

Step 10/908: Loss (avg) 0.4489931437394523, Step Time 722 ms, ETA 10:48
Step 20/908: Loss (avg) 0.574213473888788, Step Time 725 ms, ETA 10:43
Step 30/908: Loss (avg) 0.6066390359867457, Step Time 723 ms, ETA 10:34
Step 40/908: Loss (avg) 0.6081023740838466, Step Time 723 ms, ETA 10:27
Step 50/908: Loss (avg) 0.594877682953791, Step Time 730 ms, ETA 10:26
Step 60/908: Loss (avg) 0.6084850755030656, Step Time 729 ms, ETA 10:18
Step 70/908: Loss (avg) 0.5801268811530673, Step Time 729 ms, ETA 10:10
Step 80/908: Loss (avg) 0.4827161965366159, Step Time 729 ms, ETA 10:3
Step 90/908: Loss (avg) 0.5623646983054693, Step Time 730 ms, ETA 9:57
Step 100/908: Loss (avg) 0.5286866709274081, Step Time 732 ms, ETA 9:51
Step 110/908: Loss (avg) 0.5336344457087734, Step Time 731 ms, ETA 9:43
Step 120/908: Loss (avg) 0.572928988472898, Step Time 732 ms, ETA 9:36
Step 130/908: Loss (avg) 0.5516820628254296, Step Time 733 ms, ETA 9:30
Step 140/908: Loss (avg) 0.5273088484082336, Step Time 733 ms, ETA 9:

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value, self.name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_wi

Step 10/908: Loss (avg) 0.2598791261618158, Step Time 236 ms, ETA 3:31
Step 20/908: Loss (avg) 0.40958358598709377, Step Time 237 ms, ETA 3:30
Step 30/908: Loss (avg) 0.4493711571156158, Step Time 237 ms, ETA 3:28
Step 40/908: Loss (avg) 0.4792569182366404, Step Time 236 ms, ETA 3:24
Step 50/908: Loss (avg) 0.5169757447208987, Step Time 236 ms, ETA 3:22
Step 60/908: Loss (avg) 0.5275977950486883, Step Time 235 ms, ETA 3:19
Step 70/908: Loss (avg) 0.5968531546471127, Step Time 236 ms, ETA 3:17
Step 80/908: Loss (avg) 0.540926024766619, Step Time 237 ms, ETA 3:16
Step 90/908: Loss (avg) 0.5866587327935782, Step Time 237 ms, ETA 3:13
Step 100/908: Loss (avg) 0.5356823958996122, Step Time 236 ms, ETA 3:10
Step 110/908: Loss (avg) 0.5576993270719882, Step Time 236 ms, ETA 3:8
Step 120/908: Loss (avg) 0.5018706827615962, Step Time 237 ms, ETA 3:6
Step 130/908: Loss (avg) 0.5384901662000845, Step Time 238 ms, ETA 3:5
Step 140/908: Loss (avg) 0.5848375684737472, Step Time 237 ms, ETA 3:2
Step 

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value, self.name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_wi

Step 10/908: Loss (avg) 0.5057575692901334, Step Time 724 ms, ETA 10:50
Step 20/908: Loss (avg) 0.6399785195286317, Step Time 724 ms, ETA 10:42
Step 30/908: Loss (avg) 0.6661176130593287, Step Time 724 ms, ETA 10:35
Step 40/908: Loss (avg) 0.6472416282641915, Step Time 723 ms, ETA 10:27
Step 50/908: Loss (avg) 0.6418979039917225, Step Time 730 ms, ETA 10:26
Step 60/908: Loss (avg) 0.6100402249148792, Step Time 728 ms, ETA 10:17
Step 70/908: Loss (avg) 0.5720791531983447, Step Time 730 ms, ETA 10:11
Step 80/908: Loss (avg) 0.5900424853934714, Step Time 731 ms, ETA 10:5
Step 90/908: Loss (avg) 0.5597801135469065, Step Time 726 ms, ETA 9:53
Step 100/908: Loss (avg) 0.5622388868309759, Step Time 729 ms, ETA 9:49
Step 110/908: Loss (avg) 0.5258230941812317, Step Time 729 ms, ETA 9:41
Step 120/908: Loss (avg) 0.5284916764503131, Step Time 728 ms, ETA 9:33
Step 130/908: Loss (avg) 0.5241861416711229, Step Time 729 ms, ETA 9:27
Step 140/908: Loss (avg) 0.48424862004289887, Step Time 731 ms, ET

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value, self.name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_wi

Step 10/908: Loss (avg) 0.3500099364987468, Step Time 236 ms, ETA 3:31
Step 20/908: Loss (avg) 0.433734148164418, Step Time 237 ms, ETA 3:30
Step 30/908: Loss (avg) 0.4985383816966382, Step Time 237 ms, ETA 3:28
Step 40/908: Loss (avg) 0.4192331545945731, Step Time 236 ms, ETA 3:24
Step 50/908: Loss (avg) 0.5346926628011162, Step Time 238 ms, ETA 3:24
Step 60/908: Loss (avg) 0.6184456730545979, Step Time 237 ms, ETA 3:20
Step 70/908: Loss (avg) 0.46760898295613296, Step Time 235 ms, ETA 3:16
Step 80/908: Loss (avg) 0.4601744168904443, Step Time 236 ms, ETA 3:15
Step 90/908: Loss (avg) 0.6240028017019893, Step Time 236 ms, ETA 3:13
Step 100/908: Loss (avg) 0.45064811496972257, Step Time 236 ms, ETA 3:10
Step 110/908: Loss (avg) 0.43484913975671485, Step Time 238 ms, ETA 3:9
Step 120/908: Loss (avg) 0.46108216456125123, Step Time 237 ms, ETA 3:6
Step 130/908: Loss (avg) 0.5742078695628556, Step Time 236 ms, ETA 3:3
Step 140/908: Loss (avg) 0.6360240883738573, Step Time 235 ms, ETA 3:0
St

KeyboardInterrupt: 

In [None]:
import pandas as pd


pred_metrics = {}
samples = []

all_preds = pd.read_csv("./newdata_results/test_metrics_with_biochem_1ep.csv").mean()
pred_metrics["all_preds"] = all_preds  

for i in range(5):
    test_metrics_no_pred = pd.read_csv(f'./newdata_results/test_metrics_no_biochem({i}).csv')
    samples.append(test_metrics_pred)
    
pred_metrics["no_pred"] = pd.concat(samples).mean()
for biochem_pred in biochem_cols:

    samples = []
    
    for i in range(5):
        test_metrics_pred = pd.read_csv(f'./newdata_results/test_metrics_only_pred_{biochem_pred}({i}).csv')
        samples.append(test_metrics_pred)
    
    pred_metrics[biochem_pred] = pd.concat(samples).mean()

metrics_df = pd.DataFrame(pred_metrics, index=None).transpose()

print(metrics_df)

In [None]:
import numpy as np
import matplotlib.pyplot as plt 

index = np.arange(len(metrics_df["accuracy"]))
tags = ["all_preds", "no_pred"] + biochem_cols

fig, axs = plt.subplots(4,2, figsize=(32, 40))

w = 0.8
C = 8

axs[0][0].barh(index, metrics_df["accuracy"], color='b')
axs[0][0].set_title("Accuracy")
axs[0][0].set_yticks(index)
axs[0][0].set_yticklabels(tags) # 0.7,0.775
mean = metrics_df["accuracy"].mean()
std = metrics_df["accuracy"].std()
axs[0][0].set_xlim(mean-C*std,mean+C*std)
axs[0][0].axvline(x = metrics_df["accuracy"]["no_pred"], color = 'k')
axs[0][0].invert_yaxis()  # labels read top-to-bottom

axs[0][1].barh(index, metrics_df["precision"], color='g')
axs[0][1].set_title("Precision")
axs[0][1].set_yticks(index)
axs[0][1].set_yticklabels(tags)
mean = metrics_df["precision"].mean()
std = metrics_df["precision"].std()
axs[0][1].set_xlim(mean-C*std,mean+C*std)
axs[0][1].axvline(x = metrics_df["precision"]["no_pred"], color = 'k')
axs[0][1].invert_yaxis()  # labels read top-to-bottom

axs[1][0].barh(index, metrics_df["f1"], color='r')
axs[1][0].set_title("F1")
axs[1][0].set_yticks(index)
axs[1][0].set_yticklabels(tags)
mean = metrics_df["f1"].mean()
std = metrics_df["f1"].std()
axs[1][0].set_xlim(mean-C*std,mean+C*std)
axs[1][0].axvline(x = metrics_df["f1"]["no_pred"], color = 'k')
axs[1][0].invert_yaxis()  # labels read top-to-bottom

axs[1][1].barh(index, metrics_df["recall"], color='c')
axs[1][1].set_title("Recall")
axs[1][1].set_yticks(index)
axs[1][1].set_yticklabels(tags)
mean = metrics_df["recall"].mean()
std = metrics_df["recall"].std()
axs[1][1].set_xlim(mean-C*std,mean+C*std)
axs[1][1].axvline(x = metrics_df["recall"]["no_pred"], color = 'k')
axs[1][1].invert_yaxis()  # labels read top-to-bottom

axs[2][0].barh(index, metrics_df["specificity"], color='m')
axs[2][0].set_title("Specificity")
axs[2][0].set_yticks(index)
axs[2][0].set_yticklabels(tags)
mean = metrics_df["specificity"].mean()
std = metrics_df["specificity"].std()
axs[2][0].set_xlim(mean-C*std,mean+C*std)
axs[2][0].axvline(x = metrics_df["specificity"]["no_pred"], color = 'k')
axs[2][0].invert_yaxis()  # labels read top-to-bottom

axs[2][1].barh(index, metrics_df["auroc"], color='y')
axs[2][1].set_title("Area under ROC")
axs[2][1].set_yticks(index)
axs[2][1].set_yticklabels(tags)
mean = metrics_df["auroc"].mean()
std = metrics_df["auroc"].std()
axs[2][1].set_xlim(mean-C*std,mean+C*std)
axs[2][1].axvline(x = metrics_df["auroc"]["no_pred"], color = 'k')
axs[2][1].invert_yaxis()  # labels read top-to-bottom

axs[3][0].barh(index, metrics_df["aupr"], color='k')
axs[3][0].set_title("Area under PRC")
axs[3][0].set_yticks(index)
axs[3][0].set_yticklabels(tags)
mean = metrics_df["aupr"].mean()
std = metrics_df["aupr"].std()
axs[3][0].set_xlim(mean-C*std,mean+C*std)
axs[3][0].axvline(x = metrics_df["aupr"]["no_pred"], color = 'k')
axs[3][0].invert_yaxis()  # labels read top-to-bottom


##############



plt.show()