# Import and loading data


In [2]:
import numpy as np 
import pandas as pd 

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))



In [3]:
!wget https://github.com/google-research-datasets/gap-coreference/raw/master/gap-development.tsv -q
!wget https://github.com/google-research-datasets/gap-coreference/raw/master/gap-test.tsv -q
!wget https://github.com/google-research-datasets/gap-coreference/raw/master/gap-validation.tsv -q

In [7]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
device = torch.device(device)
print(device)

cuda:0


In [5]:
!pip install pytorch-pretrained-bert
!pip install https://github.com/ceshine/pytorch_helper_bot/archive/0.0.4.zip

Collecting pytorch-pretrained-bert
[?25l  Downloading https://files.pythonhosted.org/packages/d7/e0/c08d5553b89973d9a240605b9c12404bcf8227590de62bae27acbcfe076b/pytorch_pretrained_bert-0.6.2-py3-none-any.whl (123kB)
[K     |██▋                             | 10kB 16.6MB/s eta 0:00:01[K     |█████▎                          | 20kB 19.1MB/s eta 0:00:01[K     |████████                        | 30kB 15.1MB/s eta 0:00:01[K     |██████████▋                     | 40kB 13.9MB/s eta 0:00:01[K     |█████████████▎                  | 51kB 9.0MB/s eta 0:00:01[K     |███████████████▉                | 61kB 8.4MB/s eta 0:00:01[K     |██████████████████▌             | 71kB 9.4MB/s eta 0:00:01[K     |█████████████████████▏          | 81kB 9.7MB/s eta 0:00:01[K     |███████████████████████▉        | 92kB 9.9MB/s eta 0:00:01[K     |██████████████████████████▌     | 102kB 8.6MB/s eta 0:00:01[K     |█████████████████████████████▏  | 112kB 8.6MB/s eta 0:00:01[K     |███████████████████

In [6]:
import os

# This variable is used by helperbot to make the training deterministic
os.environ["SEED"] = "420"

import logging
from pathlib import Path

import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from pytorch_pretrained_bert import BertTokenizer
from pytorch_pretrained_bert.modeling import BertModel

from helperbot import BaseBot, TriangularLR, WeightDecayOptimizerWrapper

# Preparation functions

In [155]:
def insert_tag(row):
    """Insert custom tags to help us find the position of A, B, and the pronoun after tokenization."""
    to_be_inserted = sorted([
        (row["A-offset"], " [A] "),
        (row["B-offset"], " [B] "),
        (row["Pronoun-offset"], " [P] ")
    ], key=lambda x: x[0], reverse=True)
    text = row["Text"]
    for offset, tag in to_be_inserted:
        text = text[:offset] + tag + text[offset:]
    return text

def tokenize(text, tokenizer):
    """Returns a list of tokens and the positions of A, B, and the pronoun."""
    entries = {}
    final_tokens = []
    for token in tokenizer.tokenize(text):
        if token in ("[A]", "[B]", "[P]"):
            entries[token] = len(final_tokens)
            continue
        final_tokens.append(token)
    return final_tokens, (entries["[A]"], entries["[B]"], entries["[P]"])

class GAPDataset(Dataset):
    """Custom GAP Dataset class"""
    def __init__(self, df, tokenizer, labeled=True):
        self.labeled = labeled
        if labeled:
            tmp = df[["A-coref", "B-coref"]].copy()
            tmp["Neither"] = ~(df["A-coref"] | df["B-coref"])
            self.y = tmp.values.astype("bool")

        # Extracts the tokens and offsets(positions of A, B, and P)
        self.offsets, self.tokens = [], []
        for _, row in df.iterrows():
            text = insert_tag(row)
            tokens, offsets = tokenize(text, tokenizer)
            self.offsets.append(offsets)
            self.tokens.append(tokenizer.convert_tokens_to_ids(
                ["[CLS]"] + tokens + ["[SEP]"]))
        
    def __len__(self):
        return len(self.tokens)

    def __getitem__(self, idx):
        if self.labeled:
            return self.tokens[idx], self.offsets[idx], self.y[idx]
        return self.tokens[idx], self.offsets[idx]
    
def collate_examples(batch, truncate_len=500):
    """Batch preparation.
    
    1. Pad the sequences
    2. Transform the target.
    """
    transposed = list(zip(*batch))
    max_len = min(
        max((len(x) for x in transposed[0])),
        truncate_len
    )
    tokens = np.zeros((len(batch), max_len), dtype=np.int64)
    for i, row in enumerate(transposed[0]):
        row = np.array(row[:truncate_len])
        tokens[i, :len(row)] = row
    token_tensor = torch.from_numpy(tokens)
    # Offsets
    offsets = torch.stack([
        torch.LongTensor(x) for x in transposed[1]
    ], dim=0) + 1 # Account for the [CLS] token
    # Labels
    if len(transposed) == 2:
        return token_tensor, offsets, None
    one_hot_labels = torch.stack([
        torch.from_numpy(x.astype("uint8")) for x in transposed[2]
    ], dim=0)
    _, labels = one_hot_labels.max(dim=1)
    return token_tensor, offsets, labels

def collate_examples_no_labels(batch, truncate_len=500):
    """Batch preparation.
    
    1. Pad the sequences
    2. Transform the target.
    """
    transposed = list(zip(*batch))
    print("transposed size is", transposed.size())
    max_len = min(
        max((len(x) for x in transposed[0])),
        truncate_len
    )
    tokens = np.zeros((len(batch), max_len), dtype=np.int64)
    for i, row in enumerate(transposed[0]):
        row = np.array(row[:truncate_len])
        tokens[i, :len(row)] = row
    token_tensor = torch.from_numpy(tokens)
    # Offsets
    offsets = torch.stack([
        torch.LongTensor(x) for x in transposed[1]
    ], dim=0) + 1 # Account for the [CLS] token
    # Labels
    if len(transposed) == 2:
        return token_tensor, offsets, None
    one_hot_labels = torch.stack([
        torch.from_numpy(x.astype("uint8")) for x in transposed[2]
    ], dim=0)
    _, labels = one_hot_labels.max(dim=1)
    return token_tensor, offsets, labels


# Model

In [156]:
# Classical MLP model
class Head(nn.Module):
    """The MLP submodule"""
    def __init__(self, bert_hidden_size: int):
        super().__init__()
        self.head_hidden_size = 1024  # MLP hidden size
        self.bert_hidden_size = bert_hidden_size   # Bert hidden size
        self.fc = nn.Sequential(
            nn.BatchNorm1d(bert_hidden_size * 3),  # Batch Normalization * 3 is because the bert_output of A,B is expanded into 1 dimension
            nn.Dropout(0.5),                       # Random deactivation
            nn.Linear(bert_hidden_size * 3, self.head_hidden_size), # Linear layer
            nn.ReLU(),                                              # Activation function
            nn.BatchNorm1d(self.head_hidden_size),
            nn.Dropout(0.5),
            nn.Linear(self.head_hidden_size, self.head_hidden_size),
            nn.ReLU(),
            nn.BatchNorm1d(self.head_hidden_size),
            nn.Dropout(0.5),
            nn.Linear(self.head_hidden_size, self.head_hidden_size),
            nn.ReLU(),
            nn.BatchNorm1d(self.head_hidden_size),
            nn.Dropout(0.5),
            nn.Linear(self.head_hidden_size, 3)
        )
        
        # Parametrs inialization
        for i, module in enumerate(self.fc):
            if isinstance(module, (nn.BatchNorm1d, nn.BatchNorm2d)):
                nn.init.constant_(module.weight, 1)
                nn.init.constant_(module.bias, 0)
                print("Initing batchnorm")
            elif isinstance(module, nn.Linear):
                if getattr(module, "weight_v", None) is not None:
                    nn.init.uniform_(module.weight_g, 0, 1)
                    nn.init.kaiming_normal_(module.weight_v)
                    print("Initing linear with weight normalization")
                    assert model[i].weight_g is not None
                else:
                    nn.init.kaiming_normal_(module.weight)
                    print("Initing linear")
                nn.init.constant_(module.bias, 0)
    
    # Forward propagation
    def forward(self, bert_outputs, offsets):
        # bert_outputs:[batch_size, seq_length, hidden_szie]
        assert bert_outputs.size(2) == self.bert_hidden_size   
        
        # Taking out the embeddings at the offsets of A and B
        # unsqueeze(2):Expand 2-dimensional offsets to 3-dimensional
        # Extend a dimension of size 1. Such as (2,2,1) expands to (2,2,3)
        # input.gather(dim,index), index the specified dimension。For example, for a 4*3 tensor,indexing dim=1,then the value of index is 0~2.
        extracted_outputs = bert_outputs.gather(
            1, offsets.unsqueeze(2).expand(-1, -1, bert_outputs.size(2)) 
        ).view(bert_outputs.size(0), -1)      
        return self.fc(extracted_outputs)

# Current model
class GAPModel(nn.Module):
    """The main model."""
    def __init__(self, bert_model: str, device: torch.device):
        super().__init__()
        self.device = device  # Setting GPU device
        if bert_model in ("bert-base-uncased", "bert-base-cased"):
            self.bert_hidden_size = 768
        elif bert_model in ("bert-large-uncased", "bert-large-cased"):
            self.bert_hidden_size = 1024
        else:
            raise ValueError("Unsupported BERT model.")
        self.bert = BertModel.from_pretrained(bert_model).to(device)
        self.head = Head(self.bert_hidden_size).to(device)
    
    def forward(self, token_tensor, offsets):
        token_tensor = token_tensor.to(self.device)
        bert_outputs, _ =  self.bert(
            token_tensor, attention_mask=(token_tensor > 0).long(), 
            token_type_ids=None, output_all_encoded_layers=False)
        head_outputs = self.head(bert_outputs, offsets.to(self.device))
        return head_outputs             

In [157]:
offsets = torch.tensor([[0,1,2],[1,2,3]])  # batch_size=2, len(A,B,P)= 3
print(offsets.shape)
offsets = offsets.unsqueeze(2)
print(offsets)
print(offsets.shape)

torch.Size([2, 3])
tensor([[[0],
         [1],
         [2]],

        [[1],
         [2],
         [3]]])
torch.Size([2, 3, 1])


In [158]:
offsets=offsets.expand(-1,-1,5)   # bert_hidden_size=5
print(offsets)

tensor([[[0, 0, 0, 0, 0],
         [1, 1, 1, 1, 1],
         [2, 2, 2, 2, 2]],

        [[1, 1, 1, 1, 1],
         [2, 2, 2, 2, 2],
         [3, 3, 3, 3, 3]]])


In [159]:
bert_outputs = torch.tensor([[[ 1,  2,  3,  4,  5],
                  [ 6,  7,  8,  9, 10],
                  [11, 12, 13, 14, 15],
                  [16, 17, 18, 19, 20]],
                 [[21, 22, 23, 24, 25],
                  [26, 27, 28, 29, 30],
                  [31, 32, 33, 34, 35],
                  [36, 37, 38, 39, 40]]])

In [160]:
print(offsets.shape)
print(bert_outputs.shape)

torch.Size([2, 3, 5])
torch.Size([2, 4, 5])


In [161]:
bert_outputs.gather(1,offsets)

tensor([[[ 1,  2,  3,  4,  5],
         [ 6,  7,  8,  9, 10],
         [11, 12, 13, 14, 15]],

        [[26, 27, 28, 29, 30],
         [31, 32, 33, 34, 35],
         [36, 37, 38, 39, 40]]])

In [162]:
def children(m):
    return m if isinstance(m, (list, tuple)) else list(m.children())

def set_trainable_attr(m, b):
    m.trainable = b
    for p in m.parameters():
        p.requires_grad = b

def apply_leaf(m, f):
    c = children(m)
    if isinstance(m, nn.Module):
        f(m)
    if len(c) > 0:
        for l in c:
            apply_leaf(l, f)
        
def set_trainable(l, b):
    apply_leaf(l, lambda m: set_trainable_attr(m, b))

In [163]:
class GAPBot(BaseBot):
    def __init__(self, model, train_loader, val_loader, optimizer, clip_grad=0,
        avg_window=100, log_dir="./cache/logs/", log_level=logging.INFO,
        checkpoint_dir="./cache/model_cache/", batch_idx=0, echo=False,
        device="cuda:0", use_tensorboard=False):
        super().__init__(
            model, train_loader, val_loader, 
            optimizer=optimizer, clip_grad=clip_grad,
            log_dir=log_dir, checkpoint_dir=checkpoint_dir, 
            batch_idx=batch_idx, echo=echo,
            device=device, use_tensorboard=use_tensorboard
        )
        self.criterion = torch.nn.CrossEntropyLoss()
        self.loss_format = "%.6f"
        
    def extract_prediction(self, tensor):
        return tensor
    
    # Logs
    def snapshot(self):
        loss = self.eval(self.val_loader)
        loss_str = self.loss_format % loss
        self.logger.info("Snapshot loss %s", loss_str)
        self.logger.tb_scalars(
            "losses", {"val": loss},  self.step)
        target_path = (
            self.checkpoint_dir / "best.pth")        
        if not self.best_performers or (self.best_performers[0][0] > loss):
            torch.save(self.model.state_dict(), target_path)
            self.best_performers = [(loss, target_path, self.step)]
            self.logger.info("Saving checkpoint %s...", target_path)
        else:
            new_loss_str = self.loss_format % self.best_performers[0][0]
            self.logger.info("This performance:%s is not as a good as our previously saved:%s", loss_str,new_loss_str )
        assert Path(target_path).exists()
        return loss

In [164]:
df_train = pd.read_csv("gap-test.tsv", delimiter="\t")
df_val = pd.read_csv("gap-validation.tsv", delimiter="\t")
df_test = pd.read_csv("../content/test_stage_2.tsv", delimiter="\t")
sample_sub = pd.read_csv("../content/sample_submission_stage_2.csv")
assert sample_sub.shape[0] == df_test.shape[0]

In [165]:
print(len(df_train))
df_train.head()

2000


Unnamed: 0,ID,Text,Pronoun,Pronoun-offset,A,A-offset,A-coref,B,B-offset,B-coref,URL
0,test-1,Upon their acceptance into the Kontinental Hoc...,His,383,Bob Suter,352,False,Dehner,366,True,http://en.wikipedia.org/wiki/Jeremy_Dehner
1,test-2,"Between the years 1979-1981, River won four lo...",him,430,Alonso,353,True,Alfredo Di St*fano,390,False,http://en.wikipedia.org/wiki/Norberto_Alonso
2,test-3,Though his emigration from the country has aff...,He,312,Ali Aladhadh,256,True,Saddam,295,False,http://en.wikipedia.org/wiki/Aladhadh
3,test-4,"At the trial, Pisciotta said: ``Those who have...",his,526,Alliata,377,False,Pisciotta,536,True,http://en.wikipedia.org/wiki/Gaspare_Pisciotta
4,test-5,It is about a pair of United States Navy shore...,his,406,Eddie,421,True,Rock Reilly,559,False,http://en.wikipedia.org/wiki/Chasers


In [166]:
BERT_MODEL = 'bert-large-uncased'

tokenizer = BertTokenizer.from_pretrained(
    BERT_MODEL,
    do_lower_case=True,
    never_split = ("[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]", "[A]", "[B]", "[P]")
)
# These tokens are not actually used, so we can assign arbitrary values.
tokenizer.vocab["[A]"] = -1
tokenizer.vocab["[B]"] = -1
tokenizer.vocab["[P]"] = -1

In [167]:
next(iter(test_loader))

[tensor([[  101,  2005,  1996,  ...,     0,     0,     0],
         [  101,  2044,  2023,  ...,     0,     0,     0],
         [  101,  1999,  1996,  ...,     0,     0,     0],
         ...,
         [  101,  1999,  1996,  ...,     0,     0,     0],
         [  101,  1999, 15331,  ...,     0,     0,     0],
         [  101,  2044,  1996,  ...,     0,     0,     0]]),
 tensor([[ 18,  39,  69],
         [ 49,  65,  89],
         [ 48,  81,  72],
         [ 56,  75,  82],
         [ 55,  77,  81],
         [  6,  25,  32],
         [ 52,  54,  58],
         [ 70,  82,  85],
         [ 57,  76,  80],
         [ 55,  61,  68],
         [ 44,  63,  70],
         [ 51,  61,  67],
         [ 46,  52,  68],
         [ 41,  50,  54],
         [ 46,  54,  66],
         [ 19,  24,  33],
         [ 40,  47,  72],
         [ 43,  58,  64],
         [ 51,  57,  74],
         [ 42,  48,  54],
         [ 51,  62,  68],
         [ 44,  50,  57],
         [ 45,  47,  38],
         [ 36,  49,  63],
      

## Making train,test and validation datasets

In [197]:
train_ds = GAPDataset(df_train, tokenizer)
val_ds = GAPDataset(df_val, tokenizer)
val_ds2 = GAPDataset(df_val, tokenizer, labeled = False)
test_ds = GAPDataset(df_test, tokenizer, labeled=False)
# dataset convertation to dataloader
train_loader = DataLoader(
    train_ds,
    collate_fn = collate_examples,     #batch
    batch_size=20,
    num_workers=2,
    pin_memory=True,   # Using lock page memory，so that tensor transfer to cuda will be faster
    shuffle=True,
    drop_last=True     # Drop incomplete batch
)
val_loader = DataLoader(
    val_ds,
    collate_fn = collate_examples,
    batch_size=128,
    num_workers=2,
    pin_memory=True,
    shuffle=False
)
val_loader2 = DataLoader(
    val_ds2,
    collate_fn = collate_examples,
    batch_size=128,
    num_workers=2,
    pin_memory=True,
    shuffle=False
)
test_loader = DataLoader(
    test_ds,
    collate_fn = collate_examples,
    batch_size=128,
    num_workers=2,
    pin_memory=True,
    shuffle=False
)

In [172]:
len(train_loader), len(test_loader), len(val_loader)

(100, 97, 4)

In [174]:
model = GAPModel(BERT_MODEL, device)
# You can unfreeze the last layer of bert by calling set_trainable(model.bert.encoder.layer[23], True)
set_trainable(model.bert, False)
set_trainable(model.head, True)

Initing batchnorm
Initing linear
Initing batchnorm
Initing linear
Initing batchnorm
Initing linear
Initing batchnorm
Initing linear


## Adding hyperparametrs for optimization process

In [175]:
lr=1e-3
weight_decay=5e-3
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

bot = GAPBot(
    model, train_loader, val_loader,
    optimizer=optimizer, echo=True,
    avg_window=25
)

[[06/16/2021 01:33:27 PM]] SEED: 420
[[06/16/2021 01:33:27 PM]] # of paramters: 340,403,203
[[06/16/2021 01:33:27 PM]] # of trainable paramters: 5,261,315


# Train

In [176]:
steps_per_epoch = len(train_loader) 
n_steps = steps_per_epoch * 27
bot.train(
    n_steps,
    log_interval=steps_per_epoch // 4,
    snapshot_interval=steps_per_epoch,
    scheduler=TriangularLR(
        optimizer, max_mul=20, ratio=2, steps_per_cycle=n_steps)
)   

[[06/16/2021 01:33:34 PM]] Optimizer Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    initial_lr: 0.001
    lr: 5e-05
    weight_decay: 0.005
)
[[06/16/2021 01:33:34 PM]] Batches per epoch: 100
[[06/16/2021 01:33:53 PM]] Step 25: train 2.081881 lr: 7.533e-05
[[06/16/2021 01:34:10 PM]] Step 50: train 2.038459 lr: 1.017e-04
[[06/16/2021 01:34:27 PM]] Step 75: train 1.969777 lr: 1.281e-04
[[06/16/2021 01:34:47 PM]] Step 100: train 1.910000 lr: 1.545e-04
100%|██████████| 4/4 [00:18<00:00,  4.70s/it]
[[06/16/2021 01:35:05 PM]] Snapshot loss 0.937361
[[06/16/2021 01:35:10 PM]] Saving checkpoint cache/model_cache/best.pth...
[[06/16/2021 01:35:10 PM]] New low

[[06/16/2021 01:35:29 PM]] Step 125: train 1.858720 lr: 1.809e-04
[[06/16/2021 01:35:47 PM]] Step 150: train 1.796178 lr: 2.073e-04
[[06/16/2021 01:36:04 PM]] Step 175: train 1.759216 lr: 2.337e-04
[[06/16/2021 01:36:23 PM]] Step 200: train 1.709331 lr: 2.601e-04
100%|██████████| 4/4 [00:19<00:00,  

In [177]:
torch.save(model.state_dict(), './model.pth')

In [201]:
# Predict function
def predict(loader, *, return_y=False):
    model.eval()
    outputs, y_global = [], []
    with torch.set_grad_enabled(False):
        for input_tensors in loader:
            input_tensors = [x.to(model.device) for x in input_tensors if x is not None]
            outputs.append(bot.predict_batch(input_tensors).cpu())
        outputs = torch.cat(outputs, dim=0)
    return outputs

# Predict

In [202]:
preds = predict(val_loader2)
len(preds)

454

In [203]:
preds

tensor([[ 1.6400,  0.7500, -1.9589],
        [-2.5454,  2.0033, -0.5670],
        [ 0.9716,  1.0074, -1.5044],
        ...,
        [-0.0057,  2.9075, -2.7485],
        [-0.0288,  3.3687, -3.1830],
        [ 1.7442,  0.8780, -2.0920]])

In [205]:
df_sub = pd.DataFrame(torch.softmax(preds, -1).cpu().numpy().clip(1e-3, 1-1e-3), columns=["A", "B", "NEITHER"])
df_sub["ID"] = df_test.ID
df_sub.head()

Unnamed: 0,A,B,NEITHER,ID
0,0.695402,0.285576,0.019022,000075809a8e6b062f5fb3c191a8ed52
1,0.009732,0.91989,0.070377,0005d0f3b0a6c9ffbd31a48453029911
2,0.471579,0.48877,0.039651,0007775c40bedd4147a0573d66dc28f8
3,0.878121,0.050952,0.070927,001194e3fe1234d00198ef6bba4cc588
4,0.91747,0.063986,0.018545,0014bb7085278ef3f9b74f14771caca9


In [207]:
df_val_results = pd.DataFrame(columns=["A", "B", "NEITHER"])
df_val_results['A'] = df_val['A-coref'] > df_val['B-coref']
df_val_results['B'] = df_val['B-coref'] > df_val['A-coref']
df_val_results['NEITHER'] = ~(df_val_results['A'] | df_val_results['B'])
df_val_results

Unnamed: 0,A,B,NEITHER
0,False,False,True
1,False,True,False
2,False,True,False
3,True,False,False
4,False,True,False
...,...,...,...
449,False,False,True
450,False,False,True
451,False,True,False
452,False,True,False


In [212]:
df_val_results = pd.DataFrame(columns=["A", "B", "NEITHER"])
df_val_results['A'] = df_val['A-coref'] > df_val['B-coref']
df_val_results['B'] = df_val['B-coref'] > df_val['A-coref']
df_val_results['NEITHER'] = ~(df_val_results['A'] | df_val_results['B'])
df_val_results

Unnamed: 0,A,B,NEITHER
0,False,False,True
1,False,True,False
2,False,True,False
3,True,False,False
4,False,True,False
...,...,...,...
449,False,False,True
450,False,False,True
451,False,True,False
452,False,True,False


In [220]:
df_preds = pd.DataFrame(preds.cpu().numpy(),columns = ['A','B','NEITHER'])
df_preds

Unnamed: 0,A,B,NEITHER
0,1.640018,0.750037,-1.958881
1,-2.545449,2.003340,-0.567045
2,0.971577,1.007383,-1.504389
3,2.127079,-0.719820,-0.389059
4,2.568884,-0.094078,-1.332553
...,...,...,...
449,0.080464,0.402899,-0.124440
450,0.837947,0.998932,-1.460450
451,-0.005680,2.907465,-2.748477
452,-0.028786,3.368730,-3.182956


In [221]:
df_preds_results = pd.DataFrame(columns=["A", "B", "NEITHER"])
df_preds_results['A'] = df_preds['A'] > df_preds['B']
df_preds_results['B'] = df_preds['B'] > df_preds['A']
df_preds_results['NEITHER'] = ~(df_preds_results['A'] | df_preds_results['B'])
df_preds_results

Unnamed: 0,A,B,NEITHER
0,True,False,False
1,False,True,False
2,False,True,False
3,True,False,False
4,True,False,False
...,...,...,...
449,False,True,False
450,False,True,False
451,False,True,False
452,False,True,False


In [222]:
print(classification_report(df_val_results, df_preds_results))

              precision    recall  f1-score   support

           0       0.73      0.89      0.80       187
           1       0.77      0.84      0.80       205
           2       0.00      0.00      0.00        62

   micro avg       0.75      0.75      0.75       454
   macro avg       0.50      0.58      0.53       454
weighted avg       0.65      0.75      0.69       454
 samples avg       0.75      0.75      0.75       454



  _warn_prf(average, modifier, msg_start, len(result))
