In [1]:
!pip install transformers
!pip install tokenizers



## Import library

In [2]:
import os
from pathlib import Path
import numpy as np
import pandas as pd
import os
import tokenizers
import string
import torch
import transformers
import torch.nn as nn
from torch.nn import functional as F
from tqdm import tqdm
from transformers import AdamW
from transformers import get_linear_schedule_with_warmup
from tqdm import tqdm
import re

In [3]:
# If there's a GPU available...
if torch.cuda.is_available():    
    # Tell PyTorch to use the GPU.    
    device = torch.device("cuda")
    print('There are %d GPU(s) available.' % torch.cuda.device_count())
    print('We will use the GPU:', torch.cuda.get_device_name(0))

# If not...
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

There are 1 GPU(s) available.
We will use the GPU: Tesla V100-SXM2-16GB


In [4]:
class config:
    MAX_LEN = 128
    TRAIN_BATCH_SIZE = 64
    VALID_BATCH_SIZE = 16
    EPOCHS = 10
    BASE_PATH = Path("../")
    ROBERTA_PATH = BASE_PATH / "roberta-base"
    MODEL_PATH = BASE_PATH  / "model_save/model_0424_1"
    TRAINING_FILE = BASE_PATH / "input/train-5fold/train_folds.csv"
    TESTING_FILE = BASE_PATH  / "input/test.csv"
    TOKENIZER = tokenizers.ByteLevelBPETokenizer(
        vocab_file=os.path.join(ROBERTA_PATH, "roberta-base-vocab.json"),
        merges_file=os.path.join(ROBERTA_PATH, "roberta-base-merges.txt"),
        lowercase=True, 
        add_prefix_space=True
    )

## Utils

In [5]:
class AverageMeter:
    """
    Computes and stores the average and current value
    """
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def jaccard(str1, str2):
    a = set(str1.lower().split())
    b = set(str2.lower().split())
    c = a.intersection(b)
    return float(len(c)) / (len(a) + len(b) - len(c))


class EarlyStopping:
    # https://github.com/Bjarten/early-stopping-pytorch/blob/master/pytorchtools.py
    """Early stops the training if validation loss doesn't improve after a given patience."""
    def __init__(self, patience=7, verbose=False, delta=0):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement. 
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta

    def __call__(self, val_loss, model, name):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model, name)
        elif score < self.best_score + self.delta:
            self.counter += 1
            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model, name)
            self.counter = 0

    def save_checkpoint(self, val_loss, model, name):
        '''Saves model when validation loss decrease.'''
        if self.verbose:
            print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), name)
        self.val_loss_min = val_loss

## Data processing

In [6]:
def process_data(tweet, selected_text, sentiment, tokenizer, max_len):
    """
    Preprocessing the data to the RoBERTa model formatting
    """
    tweet = " " + " ".join(str(tweet).split())
    selected_text = " " + " ".join(str(selected_text).split())

    len_st = len(selected_text) - 1
    idx0 = None
    idx1 = None

    for ind in (i for i, e in enumerate(tweet) if e == selected_text[1]):
        if " " + tweet[ind: ind+len_st] == selected_text:
            idx0 = ind
            idx1 = ind + len_st - 1
            break

    char_targets = [0] * len(tweet)
    if idx0 != None and idx1 != None:
        for ct in range(idx0, idx1 + 1):
            char_targets[ct] = 1
    
    tok_tweet = tokenizer.encode(tweet)
    input_ids_orig = tok_tweet.ids
    tweet_offsets = tok_tweet.offsets
    
    target_idx = []
    for j, (offset1, offset2) in enumerate(tweet_offsets):
        if sum(char_targets[offset1: offset2]) > 0:
            target_idx.append(j)
    
    targets_start = target_idx[0]
    targets_end = target_idx[-1]

    #######
    sentiment_id = {
        'positive': 1313,
        'negative': 2430,
        'neutral': 7974
    }
    #######
    
    input_ids = [0] + [sentiment_id[sentiment]] + [2] + [2] + input_ids_orig + [2]
    token_type_ids = [0, 0, 0, 0] + [0] * (len(input_ids_orig) + 1)
    mask = [1] * len(token_type_ids)
    tweet_offsets = [(0, 0)] * 4 + tweet_offsets + [(0, 0)]
    targets_start += 4
    targets_end += 4

    padding_length = max_len - len(input_ids)
    if padding_length > 0:
        input_ids = input_ids + ([1] * padding_length)
        mask = mask + ([0] * padding_length)
        token_type_ids = token_type_ids + ([0] * padding_length)
        tweet_offsets = tweet_offsets + ([(0, 0)] * padding_length)
    
    return {
        'ids': input_ids,
        'mask': mask,
        'token_type_ids': token_type_ids,
        'targets_start': targets_start,
        'targets_end': targets_end,
        'orig_tweet': tweet,
        'orig_selected': selected_text,
        'sentiment': sentiment,
        'offsets': tweet_offsets
    }

## Data loader

In [7]:
class TweetDataset:
    def __init__(self, tweet, sentiment, selected_text):
        self.tweet = tweet
        self.sentiment = sentiment
        self.selected_text = selected_text
        self.tokenizer = config.TOKENIZER
        self.max_len = config.MAX_LEN
    
    def __len__(self):
        return len(self.tweet)

    def __getitem__(self, item):
        data = process_data(
            self.tweet[item], 
            self.selected_text[item], 
            self.sentiment[item],
            self.tokenizer,
            self.max_len
        )

        return {
            'ids': torch.tensor(data["ids"], dtype=torch.long),
            'mask': torch.tensor(data["mask"], dtype=torch.long),
            'token_type_ids': torch.tensor(data["token_type_ids"], dtype=torch.long),
            'targets_start': torch.tensor(data["targets_start"], dtype=torch.long),
            'targets_end': torch.tensor(data["targets_end"], dtype=torch.long),
            'orig_tweet': data["orig_tweet"],
            'orig_selected': data["orig_selected"],
            'sentiment': data["sentiment"],
            'offsets': torch.tensor(data["offsets"], dtype=torch.long)
        }

## The model

In [8]:
class TweetModel(transformers.BertPreTrainedModel):
    def __init__(self, conf):
        super(TweetModel, self).__init__(conf)
        self.roberta = transformers.RobertaModel.from_pretrained(config.ROBERTA_PATH, config=conf)
        self.drop_out = nn.Dropout(0.1)
        self.l0 = nn.Linear(768, 2)
        torch.nn.init.normal_(self.l0.weight, std=0.02)
    
    def forward(self, ids, mask, token_type_ids):
        roberta_output = self.roberta(
            ids,
            attention_mask=mask,
            token_type_ids=token_type_ids
        )

        out = roberta_output[0]
        out = self.drop_out(out)
        logits = self.l0(out)

        start_logits, end_logits = logits.split(1, dim=-1)

        start_logits = start_logits.squeeze(-1)
        end_logits = end_logits.squeeze(-1)

        return start_logits, end_logits

## Loss function

In [9]:
def loss_fn(start_logits, end_logits, start_positions, end_positions):
    loss_fct = nn.CrossEntropyLoss()
    start_loss = loss_fct(start_logits, start_positions)
    end_loss = loss_fct(end_logits, end_positions)
    total_loss = (start_loss + end_loss)
    return total_loss

## Training function

In [10]:
def train_fn(data_loader, model, optimizer, device, scheduler=None):
    model.train()
    losses = AverageMeter()
    jaccards = AverageMeter()

    tk0 = tqdm(data_loader, total=len(data_loader))
    
    for bi, d in enumerate(tk0):

        ids = d["ids"]
        token_type_ids = d["token_type_ids"]
        mask = d["mask"]
        targets_start = d["targets_start"]
        targets_end = d["targets_end"]
        sentiment = d["sentiment"]
        orig_selected = d["orig_selected"]
        orig_tweet = d["orig_tweet"]
        targets_start = d["targets_start"]
        targets_end = d["targets_end"]
        offsets = d["offsets"]

        ids = ids.to(device, dtype=torch.long)
        token_type_ids = token_type_ids.to(device, dtype=torch.long)
        mask = mask.to(device, dtype=torch.long)
        targets_start = targets_start.to(device, dtype=torch.long)
        targets_end = targets_end.to(device, dtype=torch.long)

        model.zero_grad()
        outputs_start, outputs_end = model(
            ids=ids,
            mask=mask,
            token_type_ids=token_type_ids,
        )
        loss = loss_fn(outputs_start, outputs_end, targets_start, targets_end)
        loss.backward()
        optimizer.step()
        scheduler.step()

        outputs_start = torch.softmax(outputs_start, dim=1).cpu().detach().numpy()
        outputs_end = torch.softmax(outputs_end, dim=1).cpu().detach().numpy()
        
        jaccard_scores = []
        for px, tweet in enumerate(orig_tweet):
            selected_tweet = orig_selected[px]
            tweet_sentiment = sentiment[px]
            jaccard_score, _ = calculate_jaccard_score(
                original_tweet=tweet,
                target_string=selected_tweet,
                sentiment_val=tweet_sentiment,
                idx_start=np.argmax(outputs_start[px, :]),
                idx_end=np.argmax(outputs_end[px, :]),
                offsets=offsets[px]
            )
            jaccard_scores.append(jaccard_score)

        jaccards.update(np.mean(jaccard_scores), ids.size(0))
        losses.update(loss.item(), ids.size(0))
        tk0.set_postfix(loss=losses.avg, jaccard=jaccards.avg)

## Evaluation function

In [11]:
def calculate_jaccard_score(
    original_tweet, 
    target_string, 
    sentiment_val, 
    idx_start, 
    idx_end, 
    offsets,
    verbose=False):
    
    if idx_end < idx_start:
        idx_end = idx_start # or idx_start = idx_end
    
    filtered_output  = ""
    for ix in range(idx_start, idx_end + 1):
        filtered_output += original_tweet[offsets[ix][0]: offsets[ix][1]]

    if sentiment_val == "neutral" or len(original_tweet.split()) < 2:
        filtered_output = original_tweet

    jac = jaccard(target_string.strip(), filtered_output.strip())
    return jac, filtered_output


def eval_fn(data_loader, model, device):
    model.eval()
    losses = AverageMeter()
    jaccards = AverageMeter()
    
    with torch.no_grad():
        tk0 = tqdm(data_loader, total=len(data_loader))
        for bi, d in enumerate(tk0):
            ids = d["ids"]
            token_type_ids = d["token_type_ids"]
            mask = d["mask"]
            sentiment = d["sentiment"]
            orig_selected = d["orig_selected"]
            orig_tweet = d["orig_tweet"]
            targets_start = d["targets_start"]
            targets_end = d["targets_end"]
            offsets = d["offsets"]

            ids = ids.to(device, dtype=torch.long)
            token_type_ids = token_type_ids.to(device, dtype=torch.long)
            mask = mask.to(device, dtype=torch.long)
            targets_start = targets_start.to(device, dtype=torch.long)
            targets_end = targets_end.to(device, dtype=torch.long)

            outputs_start, outputs_end = model(
                ids=ids,
                mask=mask,
                token_type_ids=token_type_ids
            )
            loss = loss_fn(outputs_start, outputs_end, targets_start, targets_end)
            outputs_start = torch.softmax(outputs_start, dim=1).cpu().detach().numpy()
            outputs_end = torch.softmax(outputs_end, dim=1).cpu().detach().numpy()
            
            jaccard_scores = []
            for px, tweet in enumerate(orig_tweet):
                selected_tweet = orig_selected[px]
                tweet_sentiment = sentiment[px]
                jaccard_score, _ = calculate_jaccard_score(
                    original_tweet=tweet,
                    target_string=selected_tweet,
                    sentiment_val=tweet_sentiment,
                    idx_start=np.argmax(outputs_start[px, :]),
                    idx_end=np.argmax(outputs_end[px, :]),
                    offsets=offsets[px]
                )
                jaccard_scores.append(jaccard_score)

            jaccards.update(np.mean(jaccard_scores), ids.size(0))
            losses.update(loss.item(), ids.size(0))
            tk0.set_postfix(loss=losses.avg, jaccard=jaccards.avg)
    
    print(f"Jaccard = {jaccards.avg}")
    print(f"Loss = {losses.avg}")
    return jaccards.avg, losses.avg

## Training 

In [12]:
def run(fold):

    dfx = pd.read_csv(config.TRAINING_FILE)

    df_train = dfx[dfx.kfold != fold].reset_index(drop=True)
    df_valid = dfx[dfx.kfold == fold].reset_index(drop=True)

    train_dataset = TweetDataset(
        tweet=df_train.text.values,
        sentiment=df_train.sentiment.values,
        selected_text=df_train.selected_text.values
    )

    train_data_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=config.TRAIN_BATCH_SIZE,
        num_workers=4
    )

    valid_dataset = TweetDataset(
        tweet=df_valid.text.values,
        sentiment=df_valid.sentiment.values,
        selected_text=df_valid.selected_text.values
    )

    valid_data_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=config.VALID_BATCH_SIZE,
        num_workers=2
    )
    
    device = torch.device("cuda")
    model_config = transformers.RobertaConfig.from_pretrained(os.path.join(config.ROBERTA_PATH, 'roberta-base-config.json'))
    model_config.output_hidden_states = True
    model = TweetModel(conf=model_config)
    model.to(device)

    num_train_steps = int(len(df_train) / config.TRAIN_BATCH_SIZE * config.EPOCHS)
    param_optimizer = list(model.named_parameters())
    no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
    optimizer_parameters = [
        {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.001},
        {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0},
    ]
    optimizer = AdamW(optimizer_parameters, lr=3e-5)
    scheduler = get_linear_schedule_with_warmup(
        optimizer, 
        num_warmup_steps=0, 
        num_training_steps=num_train_steps
    )

    es = EarlyStopping(patience=2, verbose=True)
    print(f"Training is Starting for fold={fold}")
    
    for epoch in range(config.EPOCHS):
        train_fn(train_data_loader, model, optimizer, device, scheduler=scheduler)
        jaccard, loss = eval_fn(valid_data_loader, model, device)
        #print(f"Jaccard Score = {jaccard}")
        #print(f"Loss score = {loss}")
        es(loss, model, name=config.MODEL_PATH / f"model_{fold}.bin")
        
        if es.early_stop:
            print("Early stopping")
            break
            
    return es.val_loss_min

## Run training

In [13]:
%%time
val_loss = []
for ifold in range(5):
    q = run(ifold)
    val_loss.append(q)
print(f'Mean val loss: {np.mean(val_loss)}')

  0%|          | 0/344 [00:00<?, ?it/s]

Training is Starting for fold=0


100%|██████████| 344/344 [02:31<00:00,  2.27it/s, jaccard=0.645, loss=2.44]
100%|██████████| 344/344 [00:14<00:00, 23.81it/s, jaccard=0.696, loss=1.74]


Jaccard = 0.696045588084984
Loss = 1.7407061458121913
Validation loss decreased (inf --> 1.740706).  Saving model ...


100%|██████████| 344/344 [02:31<00:00,  2.28it/s, jaccard=0.696, loss=1.69]
100%|██████████| 344/344 [00:14<00:00, 23.75it/s, jaccard=0.702, loss=1.69]


Jaccard = 0.7020848520149453
Loss = 1.6852934612335673
Validation loss decreased (1.740706 --> 1.685293).  Saving model ...


100%|██████████| 344/344 [02:31<00:00,  2.28it/s, jaccard=0.714, loss=1.54]
100%|██████████| 344/344 [00:14<00:00, 23.70it/s, jaccard=0.706, loss=1.72]
  0%|          | 0/344 [00:00<?, ?it/s]

Jaccard = 0.7055551107856688
Loss = 1.7229662737955673
EarlyStopping counter: 1 out of 2


100%|██████████| 344/344 [02:31<00:00,  2.27it/s, jaccard=0.732, loss=1.4] 
100%|██████████| 344/344 [00:14<00:00, 23.85it/s, jaccard=0.708, loss=1.76]


Jaccard = 0.7076539625841706
Loss = 1.761643984306069
EarlyStopping counter: 2 out of 2
Early stopping


  0%|          | 0/344 [00:00<?, ?it/s]

Training is Starting for fold=1


100%|██████████| 344/344 [02:31<00:00,  2.27it/s, jaccard=0.646, loss=2.44]
100%|██████████| 344/344 [00:14<00:00, 23.75it/s, jaccard=0.702, loss=1.78]


Jaccard = 0.7024369764727422
Loss = 1.7832437321904226
Validation loss decreased (inf --> 1.783244).  Saving model ...


100%|██████████| 344/344 [02:31<00:00,  2.28it/s, jaccard=0.695, loss=1.71]
100%|██████████| 344/344 [00:14<00:00, 23.71it/s, jaccard=0.706, loss=1.72]


Jaccard = 0.7061480338001964
Loss = 1.7236476571251527
Validation loss decreased (1.783244 --> 1.723648).  Saving model ...


100%|██████████| 344/344 [02:31<00:00,  2.28it/s, jaccard=0.713, loss=1.56]
100%|██████████| 344/344 [00:14<00:00, 23.72it/s, jaccard=0.708, loss=1.74]
  0%|          | 0/344 [00:00<?, ?it/s]

Jaccard = 0.7083657066651468
Loss = 1.7358474926621519
EarlyStopping counter: 1 out of 2


100%|██████████| 344/344 [02:31<00:00,  2.27it/s, jaccard=0.73, loss=1.42] 
100%|██████████| 344/344 [00:14<00:00, 23.64it/s, jaccard=0.706, loss=1.78]


Jaccard = 0.7064467543462333
Loss = 1.7764975744007674
EarlyStopping counter: 2 out of 2
Early stopping


  0%|          | 0/344 [00:00<?, ?it/s]

Training is Starting for fold=2


100%|██████████| 344/344 [02:31<00:00,  2.27it/s, jaccard=0.645, loss=2.48]
100%|██████████| 344/344 [00:14<00:00, 23.81it/s, jaccard=0.69, loss=1.8]  


Jaccard = 0.6898967638853234
Loss = 1.7968857924201302
Validation loss decreased (inf --> 1.796886).  Saving model ...


100%|██████████| 344/344 [02:31<00:00,  2.28it/s, jaccard=0.699, loss=1.7] 
100%|██████████| 344/344 [00:14<00:00, 23.86it/s, jaccard=0.698, loss=1.71]


Jaccard = 0.6980433839327451
Loss = 1.712849036290122
Validation loss decreased (1.796886 --> 1.712849).  Saving model ...


100%|██████████| 344/344 [02:32<00:00,  2.26it/s, jaccard=0.716, loss=1.54]
100%|██████████| 344/344 [00:14<00:00, 23.77it/s, jaccard=0.702, loss=1.72]
  0%|          | 0/344 [00:00<?, ?it/s]

Jaccard = 0.7024830689589496
Loss = 1.7161214957567743
EarlyStopping counter: 1 out of 2


100%|██████████| 344/344 [02:31<00:00,  2.27it/s, jaccard=0.729, loss=1.42]
100%|██████████| 344/344 [00:14<00:00, 23.71it/s, jaccard=0.698, loss=1.75]


Jaccard = 0.6979479950704973
Loss = 1.7503568947000765
EarlyStopping counter: 2 out of 2
Early stopping


  0%|          | 0/344 [00:00<?, ?it/s]

Training is Starting for fold=3


100%|██████████| 344/344 [02:31<00:00,  2.27it/s, jaccard=0.645, loss=2.51]
100%|██████████| 344/344 [00:14<00:00, 23.71it/s, jaccard=0.687, loss=1.77]


Jaccard = 0.6874287579114902
Loss = 1.7721374639887169
Validation loss decreased (inf --> 1.772137).  Saving model ...


100%|██████████| 344/344 [02:31<00:00,  2.27it/s, jaccard=0.697, loss=1.72]
100%|██████████| 344/344 [00:14<00:00, 23.68it/s, jaccard=0.696, loss=1.74]


Jaccard = 0.6956956665760081
Loss = 1.7355372575639443
Validation loss decreased (1.772137 --> 1.735537).  Saving model ...


100%|██████████| 344/344 [02:31<00:00,  2.27it/s, jaccard=0.712, loss=1.58]
100%|██████████| 344/344 [00:14<00:00, 23.77it/s, jaccard=0.694, loss=1.72]


Jaccard = 0.6943628630518828
Loss = 1.72137503869654
Validation loss decreased (1.735537 --> 1.721375).  Saving model ...


100%|██████████| 344/344 [02:31<00:00,  2.27it/s, jaccard=0.725, loss=1.46]
100%|██████████| 344/344 [00:14<00:00, 23.71it/s, jaccard=0.696, loss=1.71]


Jaccard = 0.6963582592883236
Loss = 1.7080516159386716
Validation loss decreased (1.721375 --> 1.708052).  Saving model ...


100%|██████████| 344/344 [02:31<00:00,  2.28it/s, jaccard=0.738, loss=1.35]
100%|██████████| 344/344 [00:14<00:00, 23.56it/s, jaccard=0.699, loss=1.74]
  0%|          | 0/344 [00:00<?, ?it/s]

Jaccard = 0.6994944575077988
Loss = 1.7371500645748417
EarlyStopping counter: 1 out of 2


100%|██████████| 344/344 [02:30<00:00,  2.28it/s, jaccard=0.746, loss=1.29]
100%|██████████| 344/344 [00:14<00:00, 23.69it/s, jaccard=0.696, loss=1.79]


Jaccard = 0.695979225709058
Loss = 1.789034164837486
EarlyStopping counter: 2 out of 2
Early stopping


  0%|          | 0/344 [00:00<?, ?it/s]

Training is Starting for fold=4


100%|██████████| 344/344 [02:31<00:00,  2.27it/s, jaccard=0.639, loss=2.53]
100%|██████████| 344/344 [00:14<00:00, 23.72it/s, jaccard=0.687, loss=1.79]


Jaccard = 0.6872192582810808
Loss = 1.7889849111039313
Validation loss decreased (inf --> 1.788985).  Saving model ...


100%|██████████| 344/344 [02:31<00:00,  2.28it/s, jaccard=0.695, loss=1.7] 
100%|██████████| 344/344 [00:14<00:00, 23.77it/s, jaccard=0.698, loss=1.72]


Jaccard = 0.6979077469032152
Loss = 1.7163709326290923
Validation loss decreased (1.788985 --> 1.716371).  Saving model ...


100%|██████████| 344/344 [02:31<00:00,  2.28it/s, jaccard=0.713, loss=1.55]
100%|██████████| 344/344 [00:14<00:00, 23.60it/s, jaccard=0.698, loss=1.7] 


Jaccard = 0.6976408937347021
Loss = 1.6999972601554514
Validation loss decreased (1.716371 --> 1.699997).  Saving model ...


100%|██████████| 344/344 [02:31<00:00,  2.28it/s, jaccard=0.727, loss=1.42]
100%|██████████| 344/344 [00:14<00:00, 23.74it/s, jaccard=0.699, loss=1.75]
  0%|          | 0/344 [00:00<?, ?it/s]

Jaccard = 0.6991395359564637
Loss = 1.7496361006427423
EarlyStopping counter: 1 out of 2


100%|██████████| 344/344 [02:31<00:00,  2.28it/s, jaccard=0.741, loss=1.31]
100%|██████████| 344/344 [00:14<00:00, 23.78it/s, jaccard=0.698, loss=1.8] 

Jaccard = 0.6983888713106771
Loss = 1.797362729779629
EarlyStopping counter: 2 out of 2
Early stopping
Mean val loss: 1.705967806148593
CPU times: user 47min 16s, sys: 16min 32s, total: 1h 3min 48s
Wall time: 1h 4min 30s



