# Import

In [1]:
import numpy as np
import pandas as pd
from ast import literal_eval
from itertools import chain
from sklearn.metrics import precision_recall_fscore_support
from tqdm.notebook import tqdm
from sklearn.model_selection import StratifiedKFold
import torch
from transformers import AutoModel, AutoTokenizer

# Config

In [2]:
class CFG:
    root = "../input/nbme-score-clinical-patient-notes"
    debug = False
    n_fold = 5
    model = "../input/huggingface-bert/bert-base-uncased"
    max_length = 512
    doc_stride = 128
    device = "cuda" if torch.cuda.is_available() else "cpu"
    lr = 1e-5
    batch_size = 16
    epochs = 3

# Create df

In [3]:
def create_train_df():
    feats = pd.read_csv(f"{CFG.root}/features.csv")
    notes = pd.read_csv(f"{CFG.root}/patient_notes.csv")
    train = pd.read_csv(f"{CFG.root}/train.csv")

    train["annotation_list"] = [literal_eval(x) for x in train["annotation"]]
    train["location_list"] = [literal_eval(x) for x in train["location"]]
    merged = train.merge(notes, how = "left")
    merged = merged.merge(feats, how = "left")
    merged = merged.loc[merged["annotation"] != "[]"].copy().reset_index(drop = True) # comment out if you train all samples
    
    def process_feature_text(text):
        return text.replace("-OR-", ";-").replace("-", " ")
    merged["feature_text"] = [process_feature_text(x) for x in merged["feature_text"]]
    
    merged["feature_text"] = merged["feature_text"].apply(lambda x: x.lower())
    merged["pn_history"] = merged["pn_history"].apply(lambda x: x.lower())
    
    merged['location_prediction'] = -1
    merged['token_proba'] = -1
    merged['token_offsets'] = -1

    if CFG.debug:
        merged = merged.sample(frac = 0.2).reset_index(drop = True)

    skf = StratifiedKFold(CFG.n_fold)
    merged["stratify_on"] = merged["case_num"].astype(str) + merged["feature_num"].astype(str)
    merged["fold"] = -1
    for fold, (_, valid_idx) in enumerate(skf.split(merged["id"], y = merged["stratify_on"])):
        merged.loc[valid_idx, "fold"] = fold
    
    print(merged.shape)
    print(merged.loc[merged["fold"]==0].shape)
    return merged

df = create_train_df()

(9901, 15)
(1981, 15)




In [4]:
df.head()

Unnamed: 0,id,case_num,pn_num,feature_num,annotation,location,annotation_list,location_list,pn_history,feature_text,location_prediction,token_proba,token_offsets,stratify_on,fold
0,00016_000,0,16,0,['dad with recent heart attcak'],['696 724'],[dad with recent heart attcak],[696 724],hpi: 17yo m presents with palpitations. patien...,family history of mi; family history of myocar...,-1,-1,-1,0,0
1,00016_001,0,16,1,"['mom with ""thyroid disease']",['668 693'],"[mom with ""thyroid disease]",[668 693],hpi: 17yo m presents with palpitations. patien...,family history of thyroid disorder,-1,-1,-1,1,0
2,00016_002,0,16,2,['chest pressure'],['203 217'],[chest pressure],[203 217],hpi: 17yo m presents with palpitations. patien...,chest pressure,-1,-1,-1,2,0
3,00016_003,0,16,3,"['intermittent episodes', 'episode']","['70 91', '176 183']","[intermittent episodes, episode]","[70 91, 176 183]",hpi: 17yo m presents with palpitations. patien...,intermittent symptoms,-1,-1,-1,3,0
4,00016_004,0,16,4,['felt as if he were going to pass out'],['222 258'],[felt as if he were going to pass out],[222 258],hpi: 17yo m presents with palpitations. patien...,lightheaded,-1,-1,-1,4,0


In [5]:
first = df.loc[0]
example = {
    "feature_text": first.feature_text,
    "pn_history": first.pn_history,
    "location_list": first.location_list,
    "annotation_list": first.annotation_list
}
for key in example.keys():
    print(key)
    print(example[key])
    print("=" * 100)

feature_text
family history of mi; family history of myocardial infarction
pn_history
hpi: 17yo m presents with palpitations. patient reports 3-4 months of intermittent episodes of "heart beating/pounding out of my chest." 2 days ago during a soccer game had an episode, but this time had chest pressure and felt as if he were going to pass out (did not lose conciousness). of note patient endorses abusing adderall, primarily to study (1-3 times per week). before recent soccer game, took adderrall night before and morning of game. denies shortness of breath, diaphoresis, fevers, chills, headache, fatigue, changes in sleep, changes in vision/hearing, abdominal paun, changes in bowel or urinary habits. 
pmhx: none
rx: uses friends adderrall
fhx: mom with "thyroid disease," dad with recent heart attcak
all: none
immunizations: up to date
shx: freshmen in college. endorses 3-4 drinks 3 nights / week (on weekends), denies tabacco, endorses trying marijuana. sexually active with girlfrien

In [6]:
def loc_list_to_ints(loc_list):
    to_return = []
    for loc_str in loc_list:
        loc_strs = loc_str.split(";")
        for loc in loc_strs:
            start, end = loc.split()
            to_return.append((int(start), int(end)))
    return to_return

print(example["location_list"])
example_loc_ints = loc_list_to_ints(example["location_list"])[0]
print(example_loc_ints)
print(example["pn_history"][example_loc_ints[0] : example_loc_ints[1]])

['696 724']
(696, 724)
dad with recent heart attcak


# Tokenizer

In [7]:
tokenizer = AutoTokenizer.from_pretrained(CFG.model)

In [8]:
def tokenize_and_add_labels(tokenizer, example):
    tokenized_inputs = tokenizer(
        example["feature_text"],
        example["pn_history"],
        max_length = CFG.max_length,
        stride = CFG.doc_stride,
        padding = "max_length",
        truncation = "only_second",
        return_offsets_mapping = True
    )
    labels = [0.0] * len(tokenized_inputs["input_ids"])
    tokenized_inputs["location_int"] = loc_list_to_ints(example["location_list"])
    tokenized_inputs["sequence_ids"] = tokenized_inputs.sequence_ids()

    for idx, (seq_id, offsets) in enumerate(zip(tokenized_inputs["sequence_ids"], tokenized_inputs["offset_mapping"])):
        if seq_id is None or seq_id == 0:
            labels[idx] = -100
            continue
        exit = False
        token_start, token_end = offsets
        for feature_start, feature_end in tokenized_inputs["location_int"]:
            if exit:
                break
            if token_start >= feature_start and token_end <= feature_end:
                labels[idx] = 1.0
                exit = True
    tokenized_inputs["labels"] = labels
    
    return tokenized_inputs

In [9]:
tokenized_inputs = tokenize_and_add_labels(tokenizer, example)
for key in tokenized_inputs.keys():
    print(key)
    print(tokenized_inputs[key])
    print("=" * 100)

input_ids
[101, 2155, 2381, 1997, 2771, 1025, 2155, 2381, 1997, 2026, 24755, 25070, 1999, 14971, 7542, 102, 6522, 2072, 1024, 2459, 7677, 1049, 7534, 2007, 14412, 23270, 10708, 1012, 5776, 4311, 1017, 1011, 1018, 2706, 1997, 23852, 4178, 1997, 1000, 2540, 6012, 1013, 9836, 2041, 1997, 2026, 3108, 1012, 1000, 1016, 2420, 3283, 2076, 1037, 4715, 2208, 2018, 2019, 2792, 1010, 2021, 2023, 2051, 2018, 3108, 3778, 1998, 2371, 2004, 2065, 2002, 2020, 2183, 2000, 3413, 2041, 1006, 2106, 2025, 4558, 9530, 18436, 2791, 1007, 1012, 1997, 3602, 5776, 2203, 5668, 2229, 8273, 7741, 5587, 21673, 2140, 1010, 3952, 2000, 2817, 1006, 1015, 1011, 1017, 2335, 2566, 2733, 1007, 1012, 2077, 3522, 4715, 2208, 1010, 2165, 5587, 2121, 7941, 2140, 2305, 2077, 1998, 2851, 1997, 2208, 1012, 23439, 2460, 2791, 1997, 3052, 1010, 22939, 8458, 16610, 2483, 1010, 9016, 2015, 1010, 10720, 2015, 1010, 14978, 1010, 16342, 1010, 3431, 1999, 3637, 1010, 3431, 1999, 4432, 1013, 4994, 1010, 21419, 29025, 2078, 1010, 3431, 19

we need "input_ids" and "attention_mask" for BERT.

labels are 1.0 at annotation.

so we can train as binary classification; does this word(token) represent the feature? -> 1 or 0

# Dataset

In [10]:
class NBMEData(torch.utils.data.Dataset):
    def __init__(self, data, tokenizer):
        self.data = data
        self.tokenizer = tokenizer
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        example = self.data.loc[idx]
        tokenized = tokenize_and_add_labels(self.tokenizer, example)

        input_ids = np.array(tokenized["input_ids"]) # for input BERT
        attention_mask = np.array(tokenized["attention_mask"]) # for input BERT
        labels = np.array(tokenized["labels"]) # for calculate loss and cv score

        offset_mapping = np.array(tokenized["offset_mapping"]) # for calculate cv score
        sequence_ids = np.array(tokenized["sequence_ids"]).astype("float16") # for calculate cv score
        
        return input_ids, attention_mask, labels, offset_mapping, sequence_ids

# Model

In [11]:
class NBMEModel(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.backbone = AutoModel.from_pretrained(CFG.model) # BERT model
        self.dropout = torch.nn.Dropout(p = 0.2)
        self.classifier = torch.nn.Linear(768, 1) # BERT has last_hidden_state(size: sequqence_length, 768)
    
    def forward(self, input_ids, attention_mask):
        last_hidden_state = self.backbone(input_ids = input_ids, attention_mask = attention_mask)[0] # idx 0 is last_hidden_state; backbone().last_hidden_state is also good
        logits = self.classifier(self.dropout(last_hidden_state)).squeeze(-1)
        return logits

# Training

In [12]:
def train_loop(fold):
    model = NBMEModel().to(CFG.device)
    optimizer = torch.optim.AdamW(model.parameters(), CFG.lr)

    train = df.loc[df["fold"] != fold].reset_index(drop = True)
    valid = df.loc[df["fold"] == fold].reset_index(drop = True)
    train_ds = NBMEData(train, tokenizer)
    valid_ds = NBMEData(valid, tokenizer)
    train_dl = torch.utils.data.DataLoader(train_ds, batch_size = CFG.batch_size, pin_memory = True, shuffle = True, drop_last = True)
    valid_dl = torch.utils.data.DataLoader(valid_ds, batch_size = CFG.batch_size * 2, pin_memory = True, shuffle = False, drop_last = False)
    
    return train_dl, valid_dl, model, optimizer

In [13]:
class AverageMeter(object):
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n = 1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def get_location_predictions(preds, offset_mapping, sequence_ids):
    all_predictions = []
    for pred, offsets, seq_ids in zip(preds, offset_mapping, sequence_ids):
        pred = sigmoid(pred)
        start_idx = None
        current_preds = []        
        for p, o, s_id in zip(pred, offsets, seq_ids):
            if s_id is None or s_id == 0:
                continue
            if p > 0.5:
                if start_idx is None:
                    start_idx = o[0]
                end_idx = o[1]
            elif start_idx is not None:
                current_preds.append((start_idx, end_idx))
                start_idx = None
        all_predictions.append(current_preds)
    return all_predictions

def calculate_char_CV(predictions, offset_mapping, sequence_ids, labels):
    all_labels = []
    all_preds = []
    for preds, offsets, seq_ids, labels in zip(predictions, offset_mapping, sequence_ids, labels):
        num_chars = max(list(chain(*offsets)))
        char_labels = np.zeros((num_chars))
        for o, s_id, label in zip(offsets, seq_ids, labels):
            if s_id is None or s_id == 0:
                continue
            if int(label) == 1:
                char_labels[o[0]:o[1]] = 1
        char_preds = np.zeros((num_chars))
        for start_idx, end_idx in preds:
            char_preds[start_idx:end_idx] = 1
        all_labels.extend(char_labels)
        all_preds.extend(char_preds)
    results = precision_recall_fscore_support(all_labels, all_preds, average = "binary")
    return {
        "precision": results[0],
        "recall": results[1],
        "f1": results[2]
    }

In [14]:
def model_loop():    
    history = {}
    for fold in range(CFG.n_fold):
        print(f"========== fold: {fold} training ==========")
        train_dl, valid_dl, model, optimizer = train_loop(fold)
        history[fold] = {"train": [], "valid": []}
        best_loss = np.inf
        
        for epoch in range(CFG.epochs):
            print(f"========== EPOCH: {epoch} training ==========")
            #training
            model.train()
            train_loss = AverageMeter()
            pbar = tqdm(train_dl)
            for batch in pbar:
                optimizer.zero_grad()
                input_ids = batch[0].to(CFG.device)
                attention_mask = batch[1].to(CFG.device)
                labels = batch[2].to(CFG.device)
                offset_mapping = batch[3]
                sequence_ids = batch[4]
                logits = model(input_ids, attention_mask)
                loss_fct = torch.nn.BCEWithLogitsLoss(reduction = "none")
                loss = loss_fct(logits, labels)
                loss = torch.masked_select(loss, labels > -1).mean() # we should calculate at "pn_history"; labels at "feature_text" are -100 < -1
                loss.backward()
                optimizer.step()
                train_loss.update(val = loss.item(), n = len(input_ids))
                pbar.set_postfix(Loss = train_loss.avg)
            print(epoch, train_loss.avg)
            history[fold]["train"].append(train_loss.avg)

            #evaluation
            model.eval()
            valid_loss = AverageMeter()
            preds = []
            offsets = []
            seq_ids = []
            lbls = []
            with torch.no_grad():
                for batch in tqdm(valid_dl):
                    input_ids = batch[0].to(CFG.device)
                    attention_mask = batch[1].to(CFG.device)
                    labels = batch[2].to(CFG.device)
                    offset_mapping = batch[3]
                    sequence_ids = batch[4]
                    logits = model(input_ids, attention_mask)
                    loss_fct = torch.nn.BCEWithLogitsLoss(reduction = "none")
                    loss = loss_fct(logits, labels)
                    loss = torch.masked_select(loss, labels > -1).mean()
                    valid_loss.update(val = loss.item(), n = len(input_ids))
                    pbar.set_postfix(Loss = valid_loss.avg)
                    preds.append(logits.cpu().numpy())
                    offsets.append(offset_mapping.numpy())
                    seq_ids.append(sequence_ids.numpy())
                    lbls.append(labels.cpu().numpy())
            print(epoch, valid_loss.avg)
            history[fold]["valid"].append(valid_loss.avg)          
            
            # save model
            if valid_loss.avg < best_loss:
                best_loss = valid_loss.avg
                torch.save(model.state_dict(), f"nbme_{fold}.pth")
                preds = np.concatenate(preds, axis = 0)
                offsets = np.concatenate(offsets, axis = 0)
                seq_ids = np.concatenate(seq_ids, axis = 0)
                lbls = np.concatenate(lbls, axis = 0)
                location_preds = get_location_predictions(preds, offsets, seq_ids)
                index = df[df['fold'] == fold].index
                df.loc[index,'location_prediction'] = pd.Series(location_preds, index=index)
                df.loc[index,'token_proba'] = pd.Series([list(preds[i]) for i in range(preds.shape[0])], index=index)
                df.loc[index,'token_offsets'] = pd.Series([list(offsets[i]) for i in range(offsets.shape[0])], index=index)
                score = calculate_char_CV(location_preds, offsets, seq_ids, lbls)
                print(score)
    print(history)
    return()

In [15]:
model_loop()



Some weights of the model checkpoint at ../input/huggingface-bert/bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).




  0%|          | 0/495 [00:00<?, ?it/s]

0 0.07859321832813515


  0%|          | 0/62 [00:00<?, ?it/s]

0 0.03653261021890114
{'precision': 0.6961559389303204, 'recall': 0.6431296017031846, 'f1': 0.668593034336479}


  0%|          | 0/495 [00:00<?, ?it/s]

1 0.03228816511838799


  0%|          | 0/62 [00:00<?, ?it/s]

1 0.024940700494493425
{'precision': 0.7401525854761232, 'recall': 0.7745350246902629, 'f1': 0.7569535754020431}


  0%|          | 0/495 [00:00<?, ?it/s]

2 0.022529416062251943


  0%|          | 0/62 [00:00<?, ?it/s]

2 0.020721026074849373
{'precision': 0.744832670686432, 'recall': 0.8364528815163074, 'f1': 0.7879885233571966}


Some weights of the model checkpoint at ../input/huggingface-bert/bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).




  0%|          | 0/495 [00:00<?, ?it/s]

0 0.07629835797293455


  0%|          | 0/62 [00:00<?, ?it/s]

0 0.03730938113505899
{'precision': 0.7381705687970189, 'recall': 0.6068961530680077, 'f1': 0.6661273693393608}


  0%|          | 0/495 [00:00<?, ?it/s]

1 0.031145214980166924


  0%|          | 0/62 [00:00<?, ?it/s]

1 0.025607080588017302
{'precision': 0.7772476838314506, 'recall': 0.7298476834590595, 'f1': 0.7528022895301693}


  0%|          | 0/495 [00:00<?, ?it/s]

2 0.021876966100253376


  0%|          | 0/62 [00:00<?, ?it/s]

2 0.02241058821613906
{'precision': 0.782572326862615, 'recall': 0.7841556114338565, 'f1': 0.7833631691401515}


Some weights of the model checkpoint at ../input/huggingface-bert/bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).




  0%|          | 0/495 [00:00<?, ?it/s]

0 0.07354514417064022


  0%|          | 0/62 [00:00<?, ?it/s]

0 0.03422325277407322
{'precision': 0.7418931384375221, 'recall': 0.6070485093041484, 'f1': 0.6677310682866859}


  0%|          | 0/495 [00:00<?, ?it/s]

1 0.030850729767185763


  0%|          | 0/62 [00:00<?, ?it/s]

1 0.024319840083220325
{'precision': 0.7058098591549296, 'recall': 0.8146718146718147, 'f1': 0.7563437411564946}


  0%|          | 0/495 [00:00<?, ?it/s]

2 0.021263481563070508


  0%|          | 0/62 [00:00<?, ?it/s]

2 0.020816450181536272
{'precision': 0.7512610340479193, 'recall': 0.8301448602200482, 'f1': 0.7887355021996664}


Some weights of the model checkpoint at ../input/huggingface-bert/bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).




  0%|          | 0/495 [00:00<?, ?it/s]

0 0.0784589328928535


  0%|          | 0/62 [00:00<?, ?it/s]

0 0.036602469282018664
{'precision': 0.6462838677603778, 'recall': 0.6856339183685429, 'f1': 0.6653776160145587}


  0%|          | 0/495 [00:00<?, ?it/s]

1 0.03262476629339585


  0%|          | 0/62 [00:00<?, ?it/s]

1 0.023705920344524088
{'precision': 0.7526769391486028, 'recall': 0.7599988279762079, 'f1': 0.7563201632891092}


  0%|          | 0/495 [00:00<?, ?it/s]

2 0.02225617623347534


  0%|          | 0/62 [00:00<?, ?it/s]

2 0.02081164407430952
{'precision': 0.7457221615436626, 'recall': 0.8402238565442879, 'f1': 0.7901574748908147}


Some weights of the model checkpoint at ../input/huggingface-bert/bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).




  0%|          | 0/495 [00:00<?, ?it/s]

0 0.07680528277246514


  0%|          | 0/62 [00:00<?, ?it/s]

0 0.03526268750442478
{'precision': 0.7332119045982541, 'recall': 0.6177611767431086, 'f1': 0.6705534777006003}


  0%|          | 0/495 [00:00<?, ?it/s]

1 0.03139201235148857


  0%|          | 0/62 [00:00<?, ?it/s]

1 0.025302337337465577
{'precision': 0.7667679294734334, 'recall': 0.7454540189946722, 'f1': 0.7559607704956542}


  0%|          | 0/495 [00:00<?, ?it/s]

2 0.02245566192678163


  0%|          | 0/62 [00:00<?, ?it/s]

2 0.020840128860689984
{'precision': 0.7778279648815741, 'recall': 0.7978051887885106, 'f1': 0.7876899326748524}
{0: {'train': [0.07859321832813515, 0.03228816511838799, 0.022529416062251943], 'valid': [0.03653261021890114, 0.024940700494493425, 0.020721026074849373]}, 1: {'train': [0.07629835797293455, 0.031145214980166924, 0.021876966100253376], 'valid': [0.03730938113505899, 0.025607080588017302, 0.02241058821613906]}, 2: {'train': [0.07354514417064022, 0.030850729767185763, 0.021263481563070508], 'valid': [0.03422325277407322, 0.024319840083220325, 0.020816450181536272]}, 3: {'train': [0.0784589328928535, 0.03262476629339585, 0.02225617623347534], 'valid': [0.036602469282018664, 0.023705920344524088, 0.02081164407430952]}, 4: {'train': [0.07680528277246514, 0.03139201235148857, 0.02245566192678163], 'valid': [0.03526268750442478, 0.025302337337465577, 0.020840128860689984]}}


()

In [16]:
df.to_pickle("df_pred.pkl")