In [1]:
!nvidia-smi

Tue May 26 14:53:53 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.82       Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   68C    P8    11W /  70W |      0MiB / 15079MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|  No ru

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
# !pip install transformers
# !git clone https://github.com/NVIDIA/apex
# !pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./apex

In [4]:
import os
import re
import string
import random
import numpy as np
import pandas as pd
import transformers
from transformers import *
import tokenizers
from tqdm import tqdm
from nltk import sent_tokenize, download  
from apex import amp
from albumentations.core.transforms_interface import DualTransform, BasicTransform

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils import data
from torch.utils.data import DataLoader, Dataset
from torch.autograd import Variable

from sklearn.model_selection import GroupKFold, StratifiedKFold, train_test_split
from transformers import AdamW
from transformers import get_linear_schedule_with_warmup

SEED = 123
def seed_all(seed_value):
    random.seed(seed_value) # Python
    np.random.seed(seed_value) # cpu vars
    torch.manual_seed(seed_value) # cpu  vars
    
    if torch.cuda.is_available(): 
        torch.cuda.manual_seed(seed_value)
        torch.cuda.manual_seed_all(seed_value) # gpu vars
        torch.backends.cudnn.deterministic = True  #needed
        torch.backends.cudnn.benchmark = False
seed_all(SEED)

import warnings 
warnings.filterwarnings('ignore')
download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [0]:
max_len = 100 
train_batch_size = 64
valid_batch_size = 100
epochs = 8
model_pos = "roberta_pos_"
model_neg = "roberta_neg_"
model_neu = "roberta_neu_"
model_path = "robertabase_"
train_file = "drive/My Drive/kaggle/Tweet Sentiment Extraction/tweet-sentiment-extraction/train.csv"
new_file = "drive/My Drive/kaggle/Tweet Sentiment Extraction/tweet-sentiment-extraction/new_data.csv"
test_file = "drive/My Drive/kaggle/Tweet Sentiment Extraction/tweet-sentiment-extraction/test.csv"
roberta_path = "drive/My Drive/kaggle/Tweet Sentiment Extraction/Vocab/Roberta"
tokenizer = tokenizers.ByteLevelBPETokenizer(
    vocab_file=f"{roberta_path}/roberta-large-vocab.json", 
    merges_file=f"{roberta_path}/roberta-large-merges.txt", 
    lowercase=True,
    add_prefix_space=True
)

In [0]:
class NLPTransform(BasicTransform):
    """ Transform for nlp task."""
    LANGS = {
        'en': 'english',
        'it': 'italian', 
        'fr': 'french', 
        'es': 'spanish',
        'tr': 'turkish', 
        'ru': 'russian',
        'pt': 'portuguese'
    }

    @property
    def targets(self):
        return {"data": self.apply}
    
    def update_params(self, params, **kwargs):
        if hasattr(self, "interpolation"):
            params["interpolation"] = self.interpolation
        if hasattr(self, "fill_value"):
            params["fill_value"] = self.fill_value
        return params

    def get_sentences(self, text, lang='en'):
        return sent_tokenize(text, self.LANGS.get(lang, 'english'))

class ShuffleSentencesTransform(NLPTransform):
    """ Do shuffle by sentence """
    def __init__(self, always_apply=False, p=0.5):
        super(ShuffleSentencesTransform, self).__init__(always_apply, p)

    def apply(self, data, **params):
        text, lang = data
        sentences = self.get_sentences(text, lang)
        random.shuffle(sentences)
        return ' '.join(sentences), lang

transform = ShuffleSentencesTransform(p=1.0)

In [0]:
def binary_cross_entropy(inputs, target, weight=None, reduction='mean', smooth_eps=None, from_logits=False):
    """cross entropy loss, with support for label smoothing https://arxiv.org/abs/1512.00567"""
    smooth_eps = smooth_eps or 0
    if smooth_eps > 0:
        target = target.float()
        target.add_(smooth_eps).div_(2.)
    if from_logits:
        return F.binary_cross_entropy_with_logits(inputs, target, weight=weight, reduction=reduction)
    else:
        return F.binary_cross_entropy(inputs, target, weight=weight, reduction=reduction)


def binary_cross_entropy_with_logits(inputs, target, weight=None, reduction='mean', smooth_eps=None, from_logits=True):
    return binary_cross_entropy(inputs, target, weight, reduction, smooth_eps, from_logits)


class BCELoss(nn.BCELoss):
    def __init__(self, weight=None, size_average=None, reduce=None, reduction='mean', smooth_eps=None, from_logits=False):
        super(BCELoss, self).__init__(weight, size_average, reduce, reduction)
        self.smooth_eps = smooth_eps
        self.from_logits = from_logits

    def forward(self, input, target):
        return binary_cross_entropy(input, target,
                                    weight=self.weight, reduction=self.reduction,
                                    smooth_eps=self.smooth_eps, from_logits=self.from_logits)


class BCEWithLogitsLoss(BCELoss):
    def __init__(self, weight=None, size_average=None, reduce=None, reduction='mean', smooth_eps=None, from_logits=True):
        super(BCEWithLogitsLoss, self).__init__(weight, size_average,
                                                reduce, reduction, smooth_eps=smooth_eps, from_logits=from_logits)

In [0]:
class AverageMeter:
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

def loss_fn(start_logits, end_logits, start_positions, end_positions):
    start_loss = BCEWithLogitsLoss(smooth_eps=None)(start_logits, start_positions) 
    end_loss = BCEWithLogitsLoss(smooth_eps=None)(end_logits, end_positions)
    total_loss = (start_loss + end_loss) #+ F.smooth_l1_loss(logits, span_logits)
#     dis_loss =  dist_loss(start_logits, end_logits, start_positions, end_positions, device)
    # len_loss = nn.MSELoss()(length, length_)/max_len
    return total_loss

def jaccard(str1, str2): 
    a = set(str1.lower().split()) 
    b = set(str2.lower().split())
    c = a.intersection(b)
    return float(len(c)) / (len(a) + len(b) - len(c))

In [0]:
def process_data(tweet, selected_text, sentiment, tokenizer, max_len):
  
    tweet = " " + " ".join(str(tweet).split())
    selected_text = " " + " ".join(str(selected_text).split())

    len_st = len(selected_text) - 1
    idx0 = None
    idx1 = None

    for ind in (i for i, e in enumerate(tweet) if e == selected_text[1]):
        if " " + tweet[ind: ind+len_st] == selected_text:
            idx0 = ind
            idx1 = ind + len_st - 1
            break

    char_targets = [0] * len(tweet)
    if idx0 != None and idx1 != None:
        for ct in range(idx0, idx1 + 1):
            char_targets[ct] = 1
    
    tok_tweet = tokenizer.encode(tweet)
    input_ids_orig = tok_tweet.ids
    tweet_offsets = tok_tweet.offsets
    
    target_idx = []
    for j, (offset1, offset2) in enumerate(tweet_offsets):
        if sum(char_targets[offset1: offset2]) > 0:
            target_idx.append(j)
    
    targets_start = target_idx[0]
    targets_end = target_idx[-1]
    
    target_logits = [0]*(targets_start+4) + [1]*len(target_idx) + [0]*(max_len-(targets_start+4)-len(target_idx))
    targets_start_logits = [0]*len(target_logits)
    targets_end_logits = [0]*len(target_logits) 

    nonzero = np.nonzero(target_logits)[0]
    if len(nonzero) > 0:
        targets_start_logits[nonzero[0]] = 1
        targets_end_logits[nonzero[-1]] = 1

    sentiment_id = {
        'positive': 1313,
        'negative': 2430,
        'neutral': 7974
    }

    input_ids = [0]+[sentiment_id[sentiment]]+[2]+[2]+input_ids_orig+[2]
    token_type_ids = [0, 0, 0, 0] + [0] * (len(input_ids_orig) + 1)
    mask = [1] * len(token_type_ids)
    tweet_offsets = [(0, 0)] * 4 + tweet_offsets + [(0, 0)]
    targets_start += 4
    targets_end += 4

    padding_length = max_len - len(input_ids)
    if padding_length > 0:
        input_ids = input_ids + ([1] * padding_length)
        mask = mask + ([0] * padding_length)
        token_type_ids = token_type_ids + ([0] * padding_length)
        tweet_offsets = tweet_offsets + ([(0, 0)] * padding_length)

    targets = targets_end - targets_start
    
    if sentiment == 'positive':
        sentiment_vector = [1,0,0]
    elif sentiment == 'negative':
        sentiment_vector = [0,0,1]
    else:
        sentiment_vector = [0,1,0]

    return {
        'ids': input_ids,
        'mask': mask,
        'token_type_ids': token_type_ids,
        'targets_start': targets_start_logits,
        'targets_end': targets_end_logits,
        'targets_start_index': targets_start,
        'targets_end_index': targets_end,
        'targets': target_logits,
        'orig_tweet': tweet,
        'orig_selected': selected_text,
        'sentiment': sentiment,
        'offsets': tweet_offsets,
        'sentiment_vector':sentiment_vector,
        'span_target':targets
    }

In [0]:
class TweetDataset(Dataset):
    def __init__(self, tweet, sentiment, selected_text):
        self.tweet = tweet
        self.sentiment = sentiment
        self.selected_text = selected_text
        self.tokenizer = tokenizer
        self.max_len = max_len
    
    def __len__(self):
        return len(self.tweet)

    def __getitem__(self, item):
        data = process_data(
            self.tweet[item], 
            self.selected_text[item], 
            self.sentiment[item],
            self.tokenizer,
            self.max_len,
        )

        return {
            'ids': torch.tensor(data["ids"], dtype=torch.long),
            'mask': torch.tensor(data["mask"], dtype=torch.long),
            'token_type_ids': torch.tensor(data["token_type_ids"], dtype=torch.long),
            'targets_start': torch.tensor(data["targets_start"], dtype=torch.float),
            'targets_end': torch.tensor(data["targets_end"], dtype=torch.float),
            'targets_start_index': torch.tensor(data["targets_start_index"], dtype=torch.long),
            'targets_end_index': torch.tensor(data["targets_end_index"], dtype=torch.long),
            'targets': torch.tensor(data["targets"], dtype=torch.float),
            'orig_tweet': data["orig_tweet"],
            'orig_selected': data["orig_selected"],
            'sentiment': data["sentiment"],
            'offsets': torch.tensor(data["offsets"], dtype=torch.long),
            'sentiment_vector': torch.tensor(data["sentiment_vector"], dtype=torch.float),
            'span_target':torch.tensor(data["span_target"], dtype=torch.float)
        }

In [0]:
# class TweetRobertaBase(BertPreTrainedModel):
#     def __init__(self, conf):
#         super(TweetRobertaBase, self).__init__(conf)
#         self.roberta = RobertaModel.from_pretrained('roberta-base', config=conf)
#         self.drop_out = nn.Dropout(0.3)
#         self.l0 = nn.Linear(768, 2)
#         self.l1 = nn.Linear(768*2, 1)
    
#     def forward(self, ids, mask, token_type_ids):
#         _, _, hidden_outputs = self.roberta(
#             ids,
#             attention_mask=mask,
#             token_type_ids=token_type_ids
#         )
        
#         sequence_output = torch.stack(hidden_outputs[-4:]).mean(0)
# #         sequence_output = hidden_outputs[-2]
#         out = self.drop_out(sequence_output)
#         logits = self.l0(out)
#         start_logits, end_logits = logits.split(1, dim=-1)
#         start_logits = start_logits.squeeze(-1)
#         end_logits = end_logits.squeeze(-1)

#         avg_pool = torch.mean(sequence_output, 1)
#         max_pool, _ = torch.max(sequence_output, 1)
#         long_logits = torch.cat((max_pool, avg_pool), 1)
#         long_logits = self.drop_out(long_logits)
#         long_logits = self.l1(long_logits).squeeze(-1)  
        
#         return start_logits, end_logits, long_logits

In [0]:
class TweetRobertaBase(BertPreTrainedModel):
    def __init__(self, conf):
        super(TweetRobertaBase, self).__init__(conf)
        self.config = conf
        self.roberta = RobertaModel.from_pretrained('roberta-base', config=self.config)
        self.lstm_units = 768
        self.num_recurrent_layers = 1
        self.bidirectional = True

        self.lstm = nn.LSTM(input_size=self.config.hidden_size*2,
                            hidden_size=self.lstm_units,
                            num_layers=self.num_recurrent_layers,
                            bidirectional=self.bidirectional,
                            batch_first=True)
        
        self.dropout = nn.Dropout(0.2)  
        self.l0 = nn.Linear(self.config.hidden_size*2, 2)
        self.l1 = nn.Linear(self.config.hidden_size*4, 3)
    
    def forward(self, ids, mask, token_type_ids):
        sequence_output, _, hidden_outputs = self.roberta(
            ids,
            attention_mask=mask,
            token_type_ids=token_type_ids
        )
        
        # sequence_output = torch.stack(hidden_outputs[-2:]).mean(0)
        sequence_output = torch.cat(tuple([hidden_outputs[i] for i in [-1, -2]]), dim=-1)

        if self.bidirectional:
            n = 2
        else: n = 1

        h0 = Variable(torch.zeros(self.num_recurrent_layers * n,       # (L * 2 OR L, B, H)
                                  ids.shape[0],
                                  self.lstm_units)).cuda()
        c0 = Variable(torch.zeros(self.num_recurrent_layers * n,        # (L * 2 OR L, B, H)
                                  ids.shape[0],
                                  self.lstm_units)).cuda()
        
        output, _ = self.lstm(sequence_output, (h0, c0))
        output = self.dropout(output)

        logits = self.l0(output)
        start_logits, end_logits = logits.split(1, dim=-1)
        start_logits = start_logits.squeeze(-1)
        end_logits = end_logits.squeeze(-1)

        # avg_pool = torch.mean(output, 1)
        # max_pool, _ = torch.max(output, 1)
        # long_logits = torch.cat((max_pool, avg_pool), 1)
        # long_logits = self.l1(long_logits)

        return start_logits, end_logits

In [0]:
cdf_threshold = 0.5

In [0]:
def train_model(model, data_loader, optimizer, scheduler, device):
    
    model.train()
    losses = AverageMeter()
    jaccards = AverageMeter()
    jaccards_neu = AverageMeter()
    jaccards_pos = AverageMeter()
    jaccards_neg = AverageMeter()

    tk0 = tqdm(data_loader, total=len(data_loader))
    
    for bi, d in enumerate(tk0):

        ids = d["ids"]
        token_type_ids = d["token_type_ids"]
        mask = d["mask"]
        targets_start = d["targets_start"]
        targets_end = d["targets_end"]
        sentiment = d["sentiment"]
        orig_selected = d["orig_selected"]
        orig_tweet = d["orig_tweet"]
        targets_start_index = d["targets_start_index"]
        targets_end_index = d["targets_end_index"]
        offsets = d["offsets"]
        targets = d["sentiment_vector"]
        span_targets = d["span_target"]

        ids = ids.to(device, dtype=torch.long)
        token_type_ids = token_type_ids.to(device, dtype=torch.long)
        mask = mask.to(device, dtype=torch.long)
        targets_start = targets_start.to(device, dtype=torch.float)
        targets_end = targets_end.to(device, dtype=torch.float)
        targets_start_index = targets_start_index.to(device, dtype=torch.long)
        targets_end_index = targets_end_index.to(device, dtype=torch.long)
        targets = targets.to(device, dtype=torch.float)
        span_targets = span_targets.to(device, dtype=torch.float)

        model.zero_grad()
        outputs_start, outputs_end = model(
            ids=ids,
            mask=mask,
            token_type_ids=token_type_ids,
        )

        loss = loss_fn(outputs_start, outputs_end, targets_start, targets_end)       
        with amp.scale_loss(loss, optimizer) as scaled_loss:
            scaled_loss.backward() 
        optimizer.step()
        scheduler.step()
        
        outputs_start = torch.softmax(outputs_start, dim=1).cpu().detach().numpy()
        outputs_end = torch.softmax(outputs_end, dim=1).cpu().detach().numpy()

        jaccard_scores = []
        jaccard_scores_neu = []
        jaccard_scores_pos = []
        jaccard_scores_neg = []

        for px, tweet in enumerate(orig_tweet):
            selected_tweet = orig_selected[px]
            tweet_sentiment = sentiment[px]
            jaccard_score, jaccard_neu_score, jaccard_pos_score, jaccard_neg_score, _ = calculate_jaccard_score(
                                                                                                          original_tweet=tweet,
                                                                                                          target_string=selected_tweet,
                                                                                                          sentiment_val=tweet_sentiment,
                                                                                                          # idx_start=np.argmax(outputs_start[px, :]),
                                                                                                          # idx_end=np.argmax(outputs_end[px, :]),
                                                                                                          idx_start=np.where(outputs_start[px, :].cumsum(axis = 0) > cdf_threshold)[0].min(),
                                                                                                          idx_end=np.where(outputs_end[px, :][::-1].cumsum(axis = 0)[::-1] > cdf_threshold)[0].max(),
                                                                                                          offsets=offsets[px]
                                                                                                      )
            jaccard_scores.append(jaccard_score)
            if jaccard_neu_score is not None:
                jaccard_scores_neu.append(jaccard_neu_score)
            if jaccard_pos_score is not None:
                jaccard_scores_pos.append(jaccard_pos_score)
            if jaccard_neg_score is not None:
                jaccard_scores_neg.append(jaccard_neg_score)

        jaccards.update(np.mean(jaccard_scores), ids.size(0))
        jaccards_neu.update(np.mean(jaccard_scores_neu), ids.size(0))
        jaccards_pos.update(np.mean(jaccard_scores_pos), ids.size(0))
        jaccards_neg.update(np.mean(jaccard_scores_neg), ids.size(0))
        losses.update(loss.item(), ids.size(0))
        tk0.set_postfix(loss=losses.avg, jaccard=jaccards.avg, jaccard_neu=jaccards_neu.avg, jaccard_pos=jaccards_pos.avg, jaccard_neg=jaccards_neg.avg)

In [0]:
def calculate_jaccard_score(original_tweet, target_string, sentiment_val, idx_start,
                            idx_end, offsets, verbose=False):
    
    if idx_end < idx_start:
        idx_end = idx_start
    
    filtered_output  = ""
    for ix in range(idx_start, idx_end + 1):
        filtered_output += original_tweet[offsets[ix][0]: offsets[ix][1]]
        if (ix+1) < len(offsets) and offsets[ix][1] < offsets[ix+1][0]:
            filtered_output += " "

    # if sentiment_val == "neutral" or len(original_tweet.split()) < 2:
    #     filtered_output = original_tweet
    # elif sentiment_val == "positive" or sentiment_val == "negative":
    #     word_tokens = word_tokenize(filtered_output)  
    #     filtered_output = ' '.join([w for w in word_tokens if not w in stop_words]) 

    jac = jaccard(target_string.strip(), filtered_output.strip())
    jac_neu, jac_pos, jac_neg = None, None, None
    if sentiment_val == "neutral":
        jac_neu = jaccard(target_string.strip(), filtered_output.strip())
    elif sentiment_val == "positive":
        jac_pos = jaccard(target_string.strip(), filtered_output.strip())
    elif sentiment_val == "negative":
        jac_neg = jaccard(target_string.strip(), filtered_output.strip())

    return jac, jac_neu, jac_pos, jac_neg, filtered_output


def eval_model(model, data_loader, device):
    
    model.eval()
    losses = AverageMeter()
    jaccards = AverageMeter()
    jaccards_neu = AverageMeter()
    jaccards_pos = AverageMeter()
    jaccards_neg = AverageMeter()

    out_strings = []

    with torch.no_grad():
        tk0 = tqdm(data_loader, total=len(data_loader))
        for bi, d in enumerate(tk0):
            ids = d["ids"]
            token_type_ids = d["token_type_ids"]
            mask = d["mask"]
            sentiment = d["sentiment"]
            orig_selected = d["orig_selected"]
            orig_tweet = d["orig_tweet"]
            targets_start = d["targets_start"]
            targets_end = d["targets_end"]
            targets_start_index = d["targets_start_index"]
            targets_end_index = d["targets_end_index"]
            offsets = d["offsets"].numpy()
            targets = d["sentiment_vector"]
            span_targets = d["span_target"]
            
            ids = ids.to(device, dtype=torch.long)
            token_type_ids = token_type_ids.to(device, dtype=torch.long)
            mask = mask.to(device, dtype=torch.long)
            targets_start = targets_start.to(device, dtype=torch.float)
            targets_end = targets_end.to(device, dtype=torch.float)
            targets_start_index = targets_start_index.to(device, dtype=torch.long)
            targets_end_index = targets_end_index.to(device, dtype=torch.long)
            targets = targets.to(device, dtype=torch.float)
            span_targets = span_targets.to(device, dtype=torch.float)
            
            outputs_start, outputs_end = model(
                ids=ids,
                mask=mask,
                token_type_ids=token_type_ids
            )
            
            loss = loss_fn(outputs_start, outputs_end, targets_start, targets_end)   
            outputs_start = torch.softmax(outputs_start, dim = 1).cpu().detach().numpy()
            outputs_end = torch.softmax(outputs_end, dim = 1).cpu().detach().numpy()

            jaccard_scores = []
            jaccard_scores_neu = []
            jaccard_scores_pos = []
            jaccard_scores_neg = []
            
            for px, tweet in enumerate(orig_tweet):
                selected_tweet = orig_selected[px]
                tweet_sentiment = sentiment[px]
                jaccard_score, jaccard_neu_score, jaccard_pos_score, jaccard_neg_score, _ = calculate_jaccard_score(
                                                                                                              original_tweet=tweet,
                                                                                                              target_string=selected_tweet,
                                                                                                              sentiment_val=tweet_sentiment,
                                                                                                              # idx_start=np.argmax(outputs_start[px, :]),
                                                                                                              # idx_end=np.argmax(outputs_end[px, :]),
                                                                                                              idx_start=np.where(outputs_start[px, :].cumsum(axis = 0) > cdf_threshold)[0].min(),
                                                                                                              idx_end=np.where(outputs_end[px, :][::-1].cumsum(axis = 0)[::-1] > cdf_threshold)[0].max(),
                                                                                                              offsets=offsets[px]
                                                                                                          )
                jaccard_scores.append(jaccard_score)
                if jaccard_neu_score is not None:
                    jaccard_scores_neu.append(jaccard_neu_score)
                if jaccard_pos_score is not None:
                    jaccard_scores_pos.append(jaccard_pos_score)
                if jaccard_neg_score is not None:
                    jaccard_scores_neg.append(jaccard_neg_score)
                out_strings.append(_)

            jaccards.update(np.mean(jaccard_scores), ids.size(0))
            jaccards_neu.update(np.mean(jaccard_scores_neu), ids.size(0))
            jaccards_pos.update(np.mean(jaccard_scores_pos), ids.size(0))
            jaccards_neg.update(np.mean(jaccard_scores_neg), ids.size(0))
            losses.update(loss.item(), ids.size(0))
            tk0.set_postfix(loss=losses.avg, jaccard=jaccards.avg, jaccard_neu=jaccards_neu.avg, jaccard_pos=jaccards_pos.avg, jaccard_neg=jaccards_neg.avg)
                   
    print(f"Jaccard = {jaccards.avg}")
    print(f"Jaccard_neu = {jaccards_neu.avg}")
    print(f"Jaccard_pos = {jaccards_pos.avg}")
    print(f"Jaccard_neg = {jaccards_neg.avg}")
    return jaccards.avg, jaccards_neu.avg, jaccards_neg.avg, jaccards_pos.avg, out_strings

In [0]:
train = pd.read_csv(train_file).dropna().reset_index(drop=True)
# orig_columns = train.columns[1:]
# train['len_text'] = train.text.apply(lambda x: len(x.split()))
# train['len_sel'] = train.selected_text.apply(lambda x: len(x.split()))
# train['len_ratio'] = train['len_sel']/train['len_text']
# train['len_text_tokenize'] = train.text.apply(lambda x: len(sent_tokenize(x)))

# extended_train = train.loc[(train['len_text_tokenize'] > 1) & (train['len_ratio'] < 0.5)].reset_index(drop = True)
# extended_train['text'] = extended_train.text.apply(lambda x: transform(data=(x.strip(), 'en'))['data'][0])

# train['target'] = 0
# train.loc[train['text'].str.strip() == train['selected_text'].str.strip(), 'target'] = 1
# extended_train_1 = train.loc[(train['target'] == 1) & (train['len_text_tokenize'] > 1)].reset_index(drop = True)
# extended_train_1['text'] = extended_train_1.text.apply(lambda x: transform(data=(x.strip(), 'en'))['data'][0])
# extended_train_1['selected_text'] = extended_train_1['text']

In [0]:
# fail_index = []
# for i in range(len(extended_train)):
#     try:
#         process_data(extended_train.text[i], extended_train.selected_text[i], extended_train.sentiment[i], tokenizer, max_len)
#     except:
#         fail_index.append(i)
# extended_train = extended_train.drop(fail_index)

# train = pd.concat([train, extended_train_1], axis=0)
# train = train[orig_columns]
# train = train.sample(frac=1).reset_index(drop=True)

In [18]:
model_config = RobertaConfig.from_pretrained("roberta-base")
model_config.num_labels = 2
model_config.output_hidden_states = True

all_scores, score_neu, score_neg, score_pos = [], [], [], []
predictions = ['']*len(train)
    
kf = StratifiedKFold(n_splits = 5, shuffle = True, random_state = 42)
for fold, (tr_ind, val_ind) in enumerate(kf.split(train, train['sentiment'])):
    print(f'Fold no {fold+1}:')
        
    x_train = train.iloc[tr_ind].reset_index(drop=True)
    x_val = train.iloc[val_ind].reset_index(drop=True)        

    train_dataset = TweetDataset(tweet = x_train.text.values,
                                sentiment = x_train.sentiment.values,
                                selected_text = x_train.selected_text.values)

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                              batch_size=train_batch_size,
                                              num_workers=4)

    valid_dataset = TweetDataset(tweet = x_val.text.values,
                                sentiment = x_val.sentiment.values,
                                selected_text = x_val.selected_text.values)

    valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                              batch_size=valid_batch_size,
                                              num_workers=1)

    device = torch.device("cuda")
    model = TweetRobertaBase(conf=model_config)
    model.to(device)

    param_optimizer = list(model.named_parameters())
    no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
    optimizer_parameters = [
        {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.001},
        {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0},
    ]

    num_train_steps = int(len(x_train) / train_batch_size * epochs)
    optimizer = AdamW(optimizer_parameters, lr=4e-5)
    
#     scheduler = get_cosine_with_hard_restarts_schedule_with_warmup(
#         optimizer,
#         num_warmup_steps=0,
#         num_training_steps=num_train_steps,
#     )

    scheduler = get_cosine_schedule_with_warmup(
        optimizer,
        num_warmup_steps=0,
        num_training_steps=num_train_steps
    )

    model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity = 0)

    print("Training....")
    best_jaccard, best_neu, best_neg, best_pos = 0, 0, 0, 0
    for epoch in range(epochs):
        train_model(model, train_loader, optimizer, scheduler, device)
        jaccard_score, jaccard_neu, jaccard_neg, jaccard_pos, out_strings = eval_model(model, valid_loader, device)
              
        if jaccard_neu > best_neu:
            best_neu = jaccard_neu
            print(f'Saving neutral model with jaccard of {best_neu}!')
            torch.save(model.state_dict(), model_neu+str(fold)+'.bin')
        if jaccard_neg > best_neg:
            best_neg = jaccard_neg
            print(f'Saving negative model with jaccard of {best_neg}!')
            torch.save(model.state_dict(), model_neg+str(fold)+'.bin')
        if jaccard_pos > best_pos:
            best_pos = jaccard_pos
            print(f'Saving positive model with jaccard of {best_pos}!')
            torch.save(model.state_dict(), model_pos+str(fold)+'.bin')
        if jaccard_score > best_jaccard:
            best_jaccard = jaccard_score
            # torch.save(model.state_dict(),'model_'+str(fold)+'.bin')
            for i in range(len(val_ind)):
                predictions[val_ind[i]] = out_strings[i]
                
    all_scores.append(best_jaccard)
    score_neu.append(best_neu)
    score_neg.append(best_neg)
    score_pos.append(best_pos)

Fold no 1:


  0%|          | 0/344 [00:00<?, ?it/s]

Training....


100%|██████████| 344/344 [02:37<00:00,  2.19it/s, jaccard=0.485, jaccard_neg=0.325, jaccard_neu=0.723, jaccard_pos=0.319, loss=0.0678]
100%|██████████| 55/55 [00:12<00:00,  4.42it/s, jaccard=0.677, jaccard_neg=0.483, jaccard_neu=0.973, jaccard_pos=0.475, loss=0.0303]


Jaccard = 0.6772624852257336
Jaccard_neu = 0.9727873033031826
Jaccard_pos = 0.4752663140076805
Jaccard_neg = 0.48295119097050887
Saving neutral model with jaccard of 0.9727873033031826!
Saving negative model with jaccard of 0.48295119097050887!
Saving positive model with jaccard of 0.4752663140076805!


100%|██████████| 344/344 [02:36<00:00,  2.20it/s, jaccard=0.695, jaccard_neg=0.498, jaccard_neu=0.976, jaccard_pos=0.51, loss=0.0277]
100%|██████████| 55/55 [00:12<00:00,  4.42it/s, jaccard=0.695, jaccard_neg=0.513, jaccard_neu=0.973, jaccard_pos=0.501, loss=0.0281]


Jaccard = 0.6947501925821513
Jaccard_neu = 0.9727873033031826
Jaccard_pos = 0.5010411655326238
Jaccard_neg = 0.5130893372166574
Saving negative model with jaccard of 0.5130893372166574!
Saving positive model with jaccard of 0.5010411655326238!


100%|██████████| 344/344 [02:35<00:00,  2.21it/s, jaccard=0.717, jaccard_neg=0.544, jaccard_neu=0.976, jaccard_pos=0.538, loss=0.025]
100%|██████████| 55/55 [00:12<00:00,  4.38it/s, jaccard=0.7, jaccard_neg=0.52, jaccard_neu=0.973, jaccard_pos=0.513, loss=0.0283]


Jaccard = 0.7004638291577134
Jaccard_neu = 0.9728362099062778
Jaccard_pos = 0.5125705160799655
Jaccard_neg = 0.5199653571973446
Saving neutral model with jaccard of 0.9728362099062778!
Saving negative model with jaccard of 0.5199653571973446!
Saving positive model with jaccard of 0.5125705160799655!


100%|██████████| 344/344 [02:35<00:00,  2.21it/s, jaccard=0.738, jaccard_neg=0.58, jaccard_neu=0.977, jaccard_pos=0.574, loss=0.0231]
100%|██████████| 55/55 [00:12<00:00,  4.41it/s, jaccard=0.702, jaccard_neg=0.523, jaccard_neu=0.973, jaccard_pos=0.517, loss=0.0286]


Jaccard = 0.7021373957937708
Jaccard_neu = 0.9728016350963907
Jaccard_pos = 0.5165439472309954
Jaccard_neg = 0.5229223684472031
Saving negative model with jaccard of 0.5229223684472031!
Saving positive model with jaccard of 0.5165439472309954!


100%|██████████| 344/344 [02:35<00:00,  2.21it/s, jaccard=0.758, jaccard_neg=0.611, jaccard_neu=0.977, jaccard_pos=0.606, loss=0.0213]
100%|██████████| 55/55 [00:12<00:00,  4.42it/s, jaccard=0.704, jaccard_neg=0.53, jaccard_neu=0.973, jaccard_pos=0.516, loss=0.0291]


Jaccard = 0.7042680416445792
Jaccard_neu = 0.9731274241987712
Jaccard_pos = 0.5162507820721332
Jaccard_neg = 0.5295553918361091
Saving neutral model with jaccard of 0.9731274241987712!
Saving negative model with jaccard of 0.5295553918361091!


100%|██████████| 344/344 [02:35<00:00,  2.21it/s, jaccard=0.775, jaccard_neg=0.642, jaccard_neu=0.979, jaccard_pos=0.631, loss=0.0199]
100%|██████████| 55/55 [00:12<00:00,  4.42it/s, jaccard=0.701, jaccard_neg=0.525, jaccard_neu=0.973, jaccard_pos=0.512, loss=0.0291]
  0%|          | 0/344 [00:00<?, ?it/s]

Jaccard = 0.7011528421052452
Jaccard_neu = 0.9725648162609576
Jaccard_pos = 0.5120530254770603
Jaccard_neg = 0.5250801214846132


100%|██████████| 344/344 [02:35<00:00,  2.21it/s, jaccard=0.788, jaccard_neg=0.664, jaccard_neu=0.979, jaccard_pos=0.652, loss=0.0189]
100%|██████████| 55/55 [00:12<00:00,  4.36it/s, jaccard=0.701, jaccard_neg=0.521, jaccard_neu=0.972, jaccard_pos=0.513, loss=0.0301]
  0%|          | 0/344 [00:00<?, ?it/s]

Jaccard = 0.7006374381349475
Jaccard_neu = 0.9723299881548013
Jaccard_pos = 0.512894486439081
Jaccard_neg = 0.5214325221896945


100%|██████████| 344/344 [02:35<00:00,  2.21it/s, jaccard=0.794, jaccard_neg=0.673, jaccard_neu=0.98, jaccard_pos=0.662, loss=0.0183]
100%|██████████| 55/55 [00:12<00:00,  4.42it/s, jaccard=0.7, jaccard_neg=0.52, jaccard_neu=0.972, jaccard_pos=0.512, loss=0.0302]


Jaccard = 0.6997390208602278
Jaccard_neu = 0.9723146805041771
Jaccard_pos = 0.5118512143481494
Jaccard_neg = 0.5204496531887285
Fold no 2:


  0%|          | 0/344 [00:00<?, ?it/s]

Training....


100%|██████████| 344/344 [02:38<00:00,  2.18it/s, jaccard=0.494, jaccard_neg=0.324, jaccard_neu=0.746, jaccard_pos=0.319, loss=0.0661]
100%|██████████| 55/55 [00:12<00:00,  4.40it/s, jaccard=0.684, jaccard_neg=0.491, jaccard_neu=0.975, jaccard_pos=0.482, loss=0.0289]


Jaccard = 0.6842724888393819
Jaccard_neu = 0.9750525690816386
Jaccard_pos = 0.4823591493958894
Jaccard_neg = 0.49137118204018077
Saving neutral model with jaccard of 0.9750525690816386!
Saving negative model with jaccard of 0.49137118204018077!
Saving positive model with jaccard of 0.4823591493958894!


100%|██████████| 344/344 [02:37<00:00,  2.18it/s, jaccard=0.687, jaccard_neg=0.49, jaccard_neu=0.975, jaccard_pos=0.493, loss=0.0282]
100%|██████████| 55/55 [00:12<00:00,  4.38it/s, jaccard=0.707, jaccard_neg=0.527, jaccard_neu=0.975, jaccard_pos=0.521, loss=0.0262]


Jaccard = 0.7066958475821361
Jaccard_neu = 0.9749351119704465
Jaccard_pos = 0.5211357349775777
Jaccard_neg = 0.5265451773207381
Saving negative model with jaccard of 0.5265451773207381!
Saving positive model with jaccard of 0.5211357349775777!


100%|██████████| 344/344 [02:37<00:00,  2.19it/s, jaccard=0.71, jaccard_neg=0.53, jaccard_neu=0.975, jaccard_pos=0.529, loss=0.0254]
100%|██████████| 55/55 [00:12<00:00,  4.40it/s, jaccard=0.704, jaccard_neg=0.516, jaccard_neu=0.975, jaccard_pos=0.52, loss=0.0261]
  0%|          | 0/344 [00:00<?, ?it/s]

Jaccard = 0.7038289383915258
Jaccard_neu = 0.9749276258593632
Jaccard_pos = 0.5198557982953738
Jaccard_neg = 0.5162687826093073


100%|██████████| 344/344 [02:36<00:00,  2.20it/s, jaccard=0.728, jaccard_neg=0.565, jaccard_neu=0.976, jaccard_pos=0.554, loss=0.0238]
100%|██████████| 55/55 [00:12<00:00,  4.40it/s, jaccard=0.711, jaccard_neg=0.529, jaccard_neu=0.975, jaccard_pos=0.53, loss=0.0262]


Jaccard = 0.7106379187504039
Jaccard_neu = 0.9746668444570578
Jaccard_pos = 0.53024721852384
Jaccard_neg = 0.5294075007079857
Saving negative model with jaccard of 0.5294075007079857!
Saving positive model with jaccard of 0.53024721852384!


100%|██████████| 344/344 [02:36<00:00,  2.20it/s, jaccard=0.742, jaccard_neg=0.585, jaccard_neu=0.976, jaccard_pos=0.581, loss=0.0226]
100%|██████████| 55/55 [00:12<00:00,  4.36it/s, jaccard=0.71, jaccard_neg=0.531, jaccard_neu=0.974, jaccard_pos=0.525, loss=0.0265]


Jaccard = 0.709558174009492
Jaccard_neu = 0.9744227007925258
Jaccard_pos = 0.5248820883426603
Jaccard_neg = 0.5311770650198643
Saving negative model with jaccard of 0.5311770650198643!


100%|██████████| 344/344 [02:36<00:00,  2.20it/s, jaccard=0.759, jaccard_neg=0.619, jaccard_neu=0.978, jaccard_pos=0.602, loss=0.021]
100%|██████████| 55/55 [00:12<00:00,  4.40it/s, jaccard=0.709, jaccard_neg=0.526, jaccard_neu=0.975, jaccard_pos=0.528, loss=0.0269]
  0%|          | 0/344 [00:00<?, ?it/s]

Jaccard = 0.709219301292692
Jaccard_neu = 0.974537050269313
Jaccard_pos = 0.52823111786608
Jaccard_neg = 0.5259231713911844


100%|██████████| 344/344 [02:35<00:00,  2.21it/s, jaccard=0.77, jaccard_neg=0.637, jaccard_neu=0.978, jaccard_pos=0.62, loss=0.0201]
100%|██████████| 55/55 [00:12<00:00,  4.42it/s, jaccard=0.708, jaccard_neg=0.524, jaccard_neu=0.974, jaccard_pos=0.528, loss=0.0274]
  0%|          | 0/344 [00:00<?, ?it/s]

Jaccard = 0.7083778751226549
Jaccard_neu = 0.9740514333652447
Jaccard_pos = 0.5283860367832218
Jaccard_neg = 0.5244955445955869


100%|██████████| 344/344 [02:35<00:00,  2.21it/s, jaccard=0.774, jaccard_neg=0.647, jaccard_neu=0.979, jaccard_pos=0.624, loss=0.0196]
100%|██████████| 55/55 [00:12<00:00,  4.39it/s, jaccard=0.71, jaccard_neg=0.525, jaccard_neu=0.974, jaccard_pos=0.532, loss=0.0276]


Jaccard = 0.7096158388346528
Jaccard_neu = 0.9741914371586676
Jaccard_pos = 0.5315587470878916
Jaccard_neg = 0.525147592296121
Saving positive model with jaccard of 0.5315587470878916!
Fold no 3:


  0%|          | 0/344 [00:00<?, ?it/s]

Training....


100%|██████████| 344/344 [02:39<00:00,  2.16it/s, jaccard=0.476, jaccard_neg=0.316, jaccard_neu=0.715, jaccard_pos=0.31, loss=0.0672]
100%|██████████| 55/55 [00:12<00:00,  4.40it/s, jaccard=0.687, jaccard_neg=0.498, jaccard_neu=0.975, jaccard_pos=0.485, loss=0.0284]


Jaccard = 0.6866634734765023
Jaccard_neu = 0.9754979523559492
Jaccard_pos = 0.4847853268406725
Jaccard_neg = 0.4979796087823672
Saving neutral model with jaccard of 0.9754979523559492!
Saving negative model with jaccard of 0.4979796087823672!
Saving positive model with jaccard of 0.4847853268406725!


100%|██████████| 344/344 [02:38<00:00,  2.18it/s, jaccard=0.693, jaccard_neg=0.496, jaccard_neu=0.976, jaccard_pos=0.505, loss=0.0275]
100%|██████████| 55/55 [00:12<00:00,  4.41it/s, jaccard=0.702, jaccard_neg=0.519, jaccard_neu=0.976, jaccard_pos=0.514, loss=0.0263]


Jaccard = 0.7017159779191593
Jaccard_neu = 0.9755996739640089
Jaccard_pos = 0.514419823809611
Jaccard_neg = 0.5187765722369276
Saving neutral model with jaccard of 0.9755996739640089!
Saving negative model with jaccard of 0.5187765722369276!
Saving positive model with jaccard of 0.514419823809611!


100%|██████████| 344/344 [02:37<00:00,  2.18it/s, jaccard=0.714, jaccard_neg=0.542, jaccard_neu=0.976, jaccard_pos=0.53, loss=0.0251]
100%|██████████| 55/55 [00:12<00:00,  4.40it/s, jaccard=0.704, jaccard_neg=0.528, jaccard_neu=0.976, jaccard_pos=0.511, loss=0.0267]


Jaccard = 0.7039187304364545
Jaccard_neu = 0.9761117968191045
Jaccard_pos = 0.5111613800747574
Jaccard_neg = 0.52753671077199
Saving neutral model with jaccard of 0.9761117968191045!
Saving negative model with jaccard of 0.52753671077199!


100%|██████████| 344/344 [02:37<00:00,  2.19it/s, jaccard=0.732, jaccard_neg=0.569, jaccard_neu=0.976, jaccard_pos=0.562, loss=0.0234]
100%|██████████| 55/55 [00:12<00:00,  4.41it/s, jaccard=0.706, jaccard_neg=0.531, jaccard_neu=0.976, jaccard_pos=0.515, loss=0.0263]


Jaccard = 0.7062949624224804
Jaccard_neu = 0.9759769691924692
Jaccard_pos = 0.5149157673868623
Jaccard_neg = 0.5311076150982212
Saving negative model with jaccard of 0.5311076150982212!
Saving positive model with jaccard of 0.5149157673868623!


100%|██████████| 344/344 [02:37<00:00,  2.19it/s, jaccard=0.748, jaccard_neg=0.6, jaccard_neu=0.976, jaccard_pos=0.585, loss=0.022]
100%|██████████| 55/55 [00:12<00:00,  4.40it/s, jaccard=0.706, jaccard_neg=0.532, jaccard_neu=0.975, jaccard_pos=0.516, loss=0.0267]


Jaccard = 0.705918499316856
Jaccard_neu = 0.974770108637632
Jaccard_pos = 0.5156373421832081
Jaccard_neg = 0.5321096654464132
Saving negative model with jaccard of 0.5321096654464132!
Saving positive model with jaccard of 0.5156373421832081!


100%|██████████| 344/344 [02:36<00:00,  2.19it/s, jaccard=0.766, jaccard_neg=0.636, jaccard_neu=0.977, jaccard_pos=0.612, loss=0.0205]
100%|██████████| 55/55 [00:12<00:00,  4.42it/s, jaccard=0.704, jaccard_neg=0.527, jaccard_neu=0.974, jaccard_pos=0.514, loss=0.0277]
  0%|          | 0/344 [00:00<?, ?it/s]

Jaccard = 0.7038889080506824
Jaccard_neu = 0.9739378094193774
Jaccard_pos = 0.5142340144873985
Jaccard_neg = 0.5273991270330532


100%|██████████| 344/344 [02:37<00:00,  2.19it/s, jaccard=0.78, jaccard_neg=0.657, jaccard_neu=0.978, jaccard_pos=0.634, loss=0.0195]
100%|██████████| 55/55 [00:12<00:00,  4.42it/s, jaccard=0.704, jaccard_neg=0.528, jaccard_neu=0.974, jaccard_pos=0.514, loss=0.0283]
  0%|          | 0/344 [00:00<?, ?it/s]

Jaccard = 0.704193313653621
Jaccard_neu = 0.9742242602045511
Jaccard_pos = 0.5135798805519286
Jaccard_neg = 0.5282280424961713


100%|██████████| 344/344 [02:37<00:00,  2.19it/s, jaccard=0.784, jaccard_neg=0.667, jaccard_neu=0.978, jaccard_pos=0.64, loss=0.019]
100%|██████████| 55/55 [00:12<00:00,  4.41it/s, jaccard=0.706, jaccard_neg=0.528, jaccard_neu=0.974, jaccard_pos=0.519, loss=0.0284]


Jaccard = 0.7060281056110576
Jaccard_neu = 0.974282053041111
Jaccard_pos = 0.5190589406340586
Jaccard_neg = 0.5284579615610224
Saving positive model with jaccard of 0.5190589406340586!
Fold no 4:


  0%|          | 0/344 [00:00<?, ?it/s]

Training....


100%|██████████| 344/344 [02:40<00:00,  2.14it/s, jaccard=0.492, jaccard_neg=0.318, jaccard_neu=0.75, jaccard_pos=0.315, loss=0.0682]
100%|██████████| 55/55 [00:12<00:00,  4.41it/s, jaccard=0.68, jaccard_neg=0.475, jaccard_neu=0.974, jaccard_pos=0.485, loss=0.0301]


Jaccard = 0.6802027589875906
Jaccard_neu = 0.9736319590569253
Jaccard_pos = 0.48524784034534124
Jaccard_neg = 0.4752741929558008
Saving neutral model with jaccard of 0.9736319590569253!
Saving negative model with jaccard of 0.4752741929558008!
Saving positive model with jaccard of 0.48524784034534124!


100%|██████████| 344/344 [02:39<00:00,  2.16it/s, jaccard=0.685, jaccard_neg=0.487, jaccard_neu=0.975, jaccard_pos=0.488, loss=0.0287]
100%|██████████| 55/55 [00:12<00:00,  4.41it/s, jaccard=0.7, jaccard_neg=0.512, jaccard_neu=0.974, jaccard_pos=0.517, loss=0.0265]


Jaccard = 0.7003899320703747
Jaccard_neu = 0.9736724228248194
Jaccard_pos = 0.5170636169274839
Jaccard_neg = 0.5119125103385895
Saving neutral model with jaccard of 0.9736724228248194!
Saving negative model with jaccard of 0.5119125103385895!
Saving positive model with jaccard of 0.5170636169274839!


100%|██████████| 344/344 [02:38<00:00,  2.17it/s, jaccard=0.712, jaccard_neg=0.536, jaccard_neu=0.976, jaccard_pos=0.528, loss=0.0254]
100%|██████████| 55/55 [00:12<00:00,  4.41it/s, jaccard=0.705, jaccard_neg=0.517, jaccard_neu=0.974, jaccard_pos=0.526, loss=0.0262]


Jaccard = 0.7047140767940149
Jaccard_neu = 0.9736904160692962
Jaccard_pos = 0.5260057115296971
Jaccard_neg = 0.5170247034444109
Saving neutral model with jaccard of 0.9736904160692962!
Saving negative model with jaccard of 0.5170247034444109!
Saving positive model with jaccard of 0.5260057115296971!


100%|██████████| 344/344 [02:38<00:00,  2.17it/s, jaccard=0.729, jaccard_neg=0.569, jaccard_neu=0.976, jaccard_pos=0.553, loss=0.0237]
100%|██████████| 55/55 [00:12<00:00,  4.39it/s, jaccard=0.706, jaccard_neg=0.52, jaccard_neu=0.973, jaccard_pos=0.53, loss=0.0268]


Jaccard = 0.7062618813928739
Jaccard_neu = 0.973438419536712
Jaccard_pos = 0.5297348701127477
Jaccard_neg = 0.5201616433832068
Saving negative model with jaccard of 0.5201616433832068!
Saving positive model with jaccard of 0.5297348701127477!


100%|██████████| 344/344 [02:37<00:00,  2.18it/s, jaccard=0.749, jaccard_neg=0.605, jaccard_neu=0.977, jaccard_pos=0.582, loss=0.0221]
100%|██████████| 55/55 [00:12<00:00,  4.42it/s, jaccard=0.707, jaccard_neg=0.521, jaccard_neu=0.973, jaccard_pos=0.534, loss=0.0268]


Jaccard = 0.7071444269890356
Jaccard_neu = 0.9726176043123104
Jaccard_pos = 0.5340364816074451
Jaccard_neg = 0.5206361762231896
Saving negative model with jaccard of 0.5206361762231896!
Saving positive model with jaccard of 0.5340364816074451!


100%|██████████| 344/344 [02:38<00:00,  2.17it/s, jaccard=0.763, jaccard_neg=0.628, jaccard_neu=0.977, jaccard_pos=0.605, loss=0.0208]
100%|██████████| 55/55 [00:12<00:00,  4.41it/s, jaccard=0.707, jaccard_neg=0.519, jaccard_neu=0.973, jaccard_pos=0.537, loss=0.0279]


Jaccard = 0.7070066886731922
Jaccard_neu = 0.972572384220604
Jaccard_pos = 0.5366063093804964
Jaccard_neg = 0.5192582578194073
Saving positive model with jaccard of 0.5366063093804964!


100%|██████████| 344/344 [02:38<00:00,  2.17it/s, jaccard=0.776, jaccard_neg=0.653, jaccard_neu=0.978, jaccard_pos=0.623, loss=0.0198]
100%|██████████| 55/55 [00:12<00:00,  4.40it/s, jaccard=0.706, jaccard_neg=0.52, jaccard_neu=0.972, jaccard_pos=0.533, loss=0.0281]
  0%|          | 0/344 [00:00<?, ?it/s]

Jaccard = 0.706112960799753
Jaccard_neu = 0.9721951242274448
Jaccard_pos = 0.5327678310707157
Jaccard_neg = 0.5195043298178196


100%|██████████| 344/344 [02:38<00:00,  2.17it/s, jaccard=0.784, jaccard_neg=0.671, jaccard_neu=0.978, jaccard_pos=0.632, loss=0.0193]
100%|██████████| 55/55 [00:12<00:00,  4.41it/s, jaccard=0.704, jaccard_neg=0.517, jaccard_neu=0.972, jaccard_pos=0.529, loss=0.0282]


Jaccard = 0.7040751938954679
Jaccard_neu = 0.9723087815791198
Jaccard_pos = 0.5288570218541659
Jaccard_neg = 0.5166012077033167
Fold no 5:


  0%|          | 0/344 [00:00<?, ?it/s]

Training....


100%|██████████| 344/344 [02:41<00:00,  2.14it/s, jaccard=0.492, jaccard_neg=0.33, jaccard_neu=0.733, jaccard_pos=0.325, loss=0.0675]
100%|██████████| 55/55 [00:12<00:00,  4.41it/s, jaccard=0.684, jaccard_neg=0.477, jaccard_neu=0.979, jaccard_pos=0.491, loss=0.0294]


Jaccard = 0.6844545700464836
Jaccard_neu = 0.97887999329448
Jaccard_pos = 0.4914839337375082
Jaccard_neg = 0.47724880188889396
Saving neutral model with jaccard of 0.97887999329448!
Saving negative model with jaccard of 0.47724880188889396!
Saving positive model with jaccard of 0.4914839337375082!


100%|██████████| 344/344 [02:39<00:00,  2.15it/s, jaccard=0.693, jaccard_neg=0.501, jaccard_neu=0.974, jaccard_pos=0.501, loss=0.0278]
100%|██████████| 55/55 [00:12<00:00,  4.41it/s, jaccard=0.694, jaccard_neg=0.504, jaccard_neu=0.978, jaccard_pos=0.497, loss=0.0272]


Jaccard = 0.6936449243888441
Jaccard_neu = 0.9784227210227453
Jaccard_pos = 0.4973639492502335
Jaccard_neg = 0.5043973023204681
Saving negative model with jaccard of 0.5043973023204681!
Saving positive model with jaccard of 0.4973639492502335!


100%|██████████| 344/344 [02:38<00:00,  2.17it/s, jaccard=0.713, jaccard_neg=0.539, jaccard_neu=0.975, jaccard_pos=0.53, loss=0.0254]
100%|██████████| 55/55 [00:12<00:00,  4.40it/s, jaccard=0.696, jaccard_neg=0.506, jaccard_neu=0.978, jaccard_pos=0.505, loss=0.0265]


Jaccard = 0.6960799682738757
Jaccard_neu = 0.9784340280499983
Jaccard_pos = 0.5052971552762118
Jaccard_neg = 0.5055346725447372
Saving negative model with jaccard of 0.5055346725447372!
Saving positive model with jaccard of 0.5052971552762118!


100%|██████████| 344/344 [02:39<00:00,  2.16it/s, jaccard=0.73, jaccard_neg=0.571, jaccard_neu=0.975, jaccard_pos=0.556, loss=0.0237]
100%|██████████| 55/55 [00:12<00:00,  4.41it/s, jaccard=0.697, jaccard_neg=0.506, jaccard_neu=0.978, jaccard_pos=0.511, loss=0.0269]


Jaccard = 0.6973962330389161
Jaccard_neu = 0.9781118226972627
Jaccard_pos = 0.5105310260249287
Jaccard_neg = 0.5060971639614384
Saving negative model with jaccard of 0.5060971639614384!
Saving positive model with jaccard of 0.5105310260249287!


100%|██████████| 344/344 [02:38<00:00,  2.16it/s, jaccard=0.743, jaccard_neg=0.597, jaccard_neu=0.975, jaccard_pos=0.574, loss=0.0224]
100%|██████████| 55/55 [00:12<00:00,  4.41it/s, jaccard=0.699, jaccard_neg=0.515, jaccard_neu=0.978, jaccard_pos=0.511, loss=0.0269]


Jaccard = 0.6993394674349536
Jaccard_neu = 0.9776489419524148
Jaccard_pos = 0.5105842045043012
Jaccard_neg = 0.5147076078753612
Saving negative model with jaccard of 0.5147076078753612!
Saving positive model with jaccard of 0.5105842045043012!


100%|██████████| 344/344 [02:38<00:00,  2.17it/s, jaccard=0.756, jaccard_neg=0.62, jaccard_neu=0.976, jaccard_pos=0.592, loss=0.0213]
100%|██████████| 55/55 [00:12<00:00,  4.43it/s, jaccard=0.7, jaccard_neg=0.514, jaccard_neu=0.978, jaccard_pos=0.513, loss=0.0271]


Jaccard = 0.7003293967119162
Jaccard_neu = 0.9777314119479562
Jaccard_pos = 0.5128542219986277
Jaccard_neg = 0.5140033996073727
Saving positive model with jaccard of 0.5128542219986277!


100%|██████████| 344/344 [02:39<00:00,  2.16it/s, jaccard=0.768, jaccard_neg=0.641, jaccard_neu=0.976, jaccard_pos=0.612, loss=0.0204]
100%|██████████| 55/55 [00:12<00:00,  4.40it/s, jaccard=0.701, jaccard_neg=0.51, jaccard_neu=0.977, jaccard_pos=0.517, loss=0.0274]


Jaccard = 0.7006424216624577
Jaccard_neu = 0.9773548452642384
Jaccard_pos = 0.516963494204593
Jaccard_neg = 0.5097471047929736
Saving positive model with jaccard of 0.516963494204593!


100%|██████████| 344/344 [02:39<00:00,  2.16it/s, jaccard=0.773, jaccard_neg=0.65, jaccard_neu=0.977, jaccard_pos=0.62, loss=0.02]
100%|██████████| 55/55 [00:12<00:00,  4.42it/s, jaccard=0.702, jaccard_neg=0.512, jaccard_neu=0.977, jaccard_pos=0.52, loss=0.0277]


Jaccard = 0.7024671046085513
Jaccard_neu = 0.9772288665713563
Jaccard_pos = 0.5204993908765064
Jaccard_neg = 0.512442814596404
Saving positive model with jaccard of 0.5204993908765064!


In [19]:
print('Jaccard score')
print(all_scores, np.mean(all_scores))
print('Neutral score')
print(score_neu, np.mean(score_neu))
print('Negative score')
print(score_neg, np.mean(score_neg))
print('Positive score')
print(score_pos, np.mean(score_pos)) 

Jaccard score
[0.7042680416445792, 0.7106379187504039, 0.7062949624224804, 0.7071444269890356, 0.7024671046085513] 0.7061624908830101
Neutral score
[0.9731274241987712, 0.9750525690816386, 0.9761117968191045, 0.9736904160692962, 0.97887999329448] 0.9753724398926582
Negative score
[0.5295553918361091, 0.5311770650198643, 0.5321096654464132, 0.5206361762231896, 0.5147076078753612] 0.5256371812801874
Positive score
[0.5165439472309954, 0.5315587470878916, 0.5190589406340586, 0.5366063093804964, 0.5204993908765064] 0.5248534670419895


In [20]:
train['predictions'] = predictions
train.loc[(train.sentiment == 'negative') | (train.sentiment=='positive')][:30]

Unnamed: 0,textID,text,selected_text,sentiment,predictions
1,549e992a42,Sooo SAD I will miss you here in San Diego!!!,Sooo SAD,negative,Sooo SAD
2,088c60f138,my boss is bullying me...,bullying me,negative,bullying me...
3,9642c003ef,what interview! leave me alone,leave me alone,negative,leave me alone
4,358bd9e861,"Sons of ****, why couldn`t they put them on t...","Sons of ****,",negative,"****,"
6,6e0c6d75b1,2am feedings for the baby are fun when he is a...,fun,positive,fun
9,fc2cbefa9d,Journey!? Wow... u just became cooler. hehe....,Wow... u just became cooler.,positive,Wow...
11,16fab9f95b,I really really like the song Love Story by Ta...,like,positive,I really really like
12,74a76f6e0a,My Sharpie is running DANGERously low on ink,DANGERously,negative,running DANGERously low on ink
13,04dd1d2e34,i want to go to music tonight but i lost my vo...,lost,negative,lost
15,8a939bfb59,"Uh oh, I am sunburned","Uh oh, I am sunburned",negative,sunburned
