In [14]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [15]:
!pip install -q transformers
!pip install -q simpletransformers
!pip install -q gputil
!pip install -q psutil
!pip install -q humanize

In [16]:
import psutil
import humanize
import os, gc
import random, re
import numpy as np
import pandas as pd
import textblob
from nltk import sent_tokenize
from scipy.special import softmax
from sklearn.metrics import log_loss
from sklearn.model_selection import *
from sklearn.preprocessing import LabelEncoder

#from tqdm import tqdm

import warnings
warnings.filterwarnings('ignore')

import torch
import torch.nn as nn
from keras.utils import to_categorical

from transformers import PreTrainedModel, PreTrainedTokenizer, PretrainedConfig
from transformers import RobertaForSequenceClassification, RobertaTokenizer, RobertaConfig
from albumentations.core.transforms_interface import DualTransform, BasicTransform
from simpletransformers.classification import ClassificationModel

def seed_all(seed_value):
    random.seed(seed_value) 
    np.random.seed(seed_value) 
    torch.manual_seed(seed_value)
    
    if torch.cuda.is_available(): 
        torch.cuda.manual_seed(seed_value)
        torch.cuda.manual_seed_all(seed_value)
        torch.backends.cudnn.deterministic = True 
        torch.backends.cudnn.benchmark = False

seed = 2
seed_all(seed)

## LOAD COMPETITION DATA

In [17]:
path = '/content/drive/My Drive/'
comp = 'Tech4MentalHealth/'
translated = 'translated/'

TRAIN_DATA_FILE = f'{path}{comp}Train.csv'
TEST_DATA_FILE = f'{path}{comp}Test.csv'
SAMPLE_SUB_FILE = f'{path}{comp}SampleSubmission.csv'
PSEUDO_LABEL = f'{path}pseudolabeldata/data_depression_pseudo_label.csv'

train = pd.read_csv(TRAIN_DATA_FILE)
test = pd.read_csv(TEST_DATA_FILE)

feat_cols = "text"
labels_ord = ['Alcohol','Depression', 'Drugs', 'Suicide']
train['label'] = train.label.astype('category').cat.codes

Y = to_categorical(train['label'])

for i in range(len(labels_ord)) :     
     train[labels_ord[i]] = Y[:,i]

In [18]:
train.head(3)

Unnamed: 0,ID,text,label,Alcohol,Depression,Drugs,Suicide
0,SUAVK39Z,I feel that it was better I dieAm happy,1,0.0,1.0,0.0,0.0
1,9JDAGUV3,Why do I get hallucinations?,2,0.0,0.0,1.0,0.0
2,419WR1LQ,I am stresseed due to lack of financial suppor...,1,0.0,1.0,0.0,0.0


## DATA AUGMENTATION

In [19]:
class NLPTransform(BasicTransform):
    """ Transform for nlp task."""
    LANGS = {
        'en': 'english'
    }

    @property
    def targets(self):
        return {"data": self.apply}
    
    def update_params(self, params, **kwargs):
        if hasattr(self, "interpolation"):
            params["interpolation"] = self.interpolation
        if hasattr(self, "fill_value"):
            params["fill_value"] = self.fill_value
        return params

    def get_sentences(self, text, lang='en'):
        return sent_tokenize(text, self.LANGS.get(lang, 'english'))


class SwapWordsTransform(NLPTransform):
    """ Swap words next to each other """
    def __init__(self, swap_distance=1, swap_probability=0.1, always_apply=False, p=0.5):
        """  
        swap_distance - distance for swapping words
        swap_probability - probability of swapping for one word
        """
        super(SwapWordsTransform, self).__init__(always_apply, p)
        self.swap_distance = swap_distance
        self.swap_probability = swap_probability
        self.swap_range_list = list(range(1, swap_distance+1))

    def apply(self, data, **params):
        text, lang = data
        words = text.split()
        words_count = len(words)
        if words_count <= 1:
            return text, lang

        new_words = {}
        for i in range(words_count):
            if random.random() > self.swap_probability:
                new_words[i] = words[i]
                continue
    
            if i < self.swap_distance:
                new_words[i] = words[i]
                continue
    
            swap_idx = i - random.choice(self.swap_range_list)
            new_words[i] = new_words[swap_idx]
            new_words[swap_idx] = words[i]

        return ' '.join([v for k, v in sorted(new_words.items(), key=lambda x: x[0])]), lang

In [20]:
class CutOutWordsTransform(NLPTransform):
    """ Remove random words """
    def __init__(self, cutout_probability=0.05, always_apply=False, p=0.5):
        super(CutOutWordsTransform, self).__init__(always_apply, p)
        self.cutout_probability = cutout_probability

    def apply(self, data, **params):
        text, lang = data
        words = text.split()
        words_count = len(words)
        if words_count <= 1:
            return text, lang
        
        new_words = []
        for i in range(words_count):
            if random.random() < self.cutout_probability:
                continue
            new_words.append(words[i])

        if len(new_words) == 0:
            return words[random.randint(0, words_count-1)], lang

        return ' '.join(new_words), lang

In [21]:
swap_transform = SwapWordsTransform(p=1.0, swap_distance=1, swap_probability=0.2)
cutout_transform = CutOutWordsTransform(p=1.0, cutout_probability=0.2)

lang = 'en'
train_copy = train.copy()

for i in range(len(train_copy)):
        text = train_copy['text'][i]
        text = swap_transform(data=(text, lang))['data'][0]
        train_copy['text'][i] = cutout_transform(data=(text, lang))['data'][0]

In [22]:
train_copy.head(5)

Unnamed: 0,ID,text,label,Alcohol,Depression,Drugs,Suicide
0,SUAVK39Z,feel I it that was I better dieAm happy,1,0.0,1.0,0.0,0.0
1,9JDAGUV3,Why I do hallucinations? get,2,0.0,0.0,1.0,0.0
2,419WR1LQ,I am stresseed due to lack of financial suppor...,1,0.0,1.0,0.0,0.0
3,6UY7DX6Q,Why is life important?,3,0.0,0.0,0.0,1.0
4,FYC0FTFB,How I be to through the depression?,1,0.0,1.0,0.0,0.0


### ADD AUGMENTED DRUG AND SUICIDE STATEMENTS TO TRAIN

In [23]:
df_drug_transform = train_copy[train_copy['label']==2]
df_suicide_transform = train_copy[train_copy['label']==3]

train = pd.concat([train, df_drug_transform, df_suicide_transform], axis=0).sample(frac=1).reset_index(drop=True)

train_df = train.drop(columns=['ID'])
test_df = test.drop(columns=['ID'])

for col in labels_ord:
    test_df[col] = 0

## REMOVE DUPLICATES AND CORRECT MISPELLED WORDS

In [24]:
train_df.drop_duplicates(subset ="text", keep = False, inplace = True) 

train_df['text'] = train_df.text.apply(lambda txt: ''.join(textblob.TextBlob(txt).correct()))
test_df['text'] = test_df.text.apply(lambda txt: ''.join(textblob.TextBlob(txt).correct()))

## DEFINE THE MODEL

In [25]:
def get_model(train, model_name, model_config, epochs, max_seq_length, train_batch_size, seed, learning_rate, n_splits):
    
    loss=[]
    y_pred_test=[]

    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=seed)
    
    print(f"Training {n_splits} folds  with --{model_config} --{epochs} epochs")
    
    for fold_, (train_index, test_index) in enumerate(skf.split(train, train['label'])):
        train1_trn, train1_val = train.iloc[train_index], train.iloc[test_index]
        model = ClassificationModel(model_name, model_config, use_cuda=True, num_labels=4, args={
                                                                             'train_batch_size':train_batch_size,
                                                                             'reprocess_input_data': True,
                                                                             'overwrite_output_dir': True,
                                                                             'fp16': False,
                                                                             'do_lower_case': False,
                                                                             'num_train_epochs': epochs,
                                                                             'max_seq_length': max_seq_length,
                                                                             'regression': False,
                                                                             'manual_seed': seed,
                                                                             "learning_rate":learning_rate,
                                                                             'weight_decay':0,
                                                                             "save_eval_checkpoints": False,
                                                                             "save_model_every_epoch": False,
                                                                             "silent": True})
        model.train_model(train1_trn)
        raw_outputs_val = model.eval_model(train1_val)[1]
        raw_outputs_vals = softmax(raw_outputs_val,axis=1)
        print(f"Fold n°{fold_+1} LogLoss : {round(log_loss(train1_val['label'], raw_outputs_vals), 6)}")
        loss.append(log_loss(train1_val['label'], raw_outputs_vals))
        raw_outputs_test = model.eval_model(test_df)[1]
        raw_outputs_tests = softmax(raw_outputs_test,axis=1)
        y_pred_test.append(raw_outputs_tests)
        
    print(f"\nOverall CV LogLoss : {round(np.mean(loss), 6)}")
    
    return y_pred_test, loss

## Model 1 : RoBERTa base

In [26]:
preds_roberta_base_1997, loss_roberta_base_1997 = get_model(train_df, 'roberta', 'roberta-base', 4, 128, 16, 1997, 2e-05, 10)

sub_model1 = pd.read_csv(SAMPLE_SUB_FILE)
preds_model1 = np.mean(preds_roberta_base_1997, 0)

for i in range(len(labels_ord)): 
    sub_model1[labels_ord[i]] = preds_model1[:,i]

sub_model1.to_csv('sub_model1_1997.csv',index=False)
sub_model1.head()

Training 10 folds  with --roberta-base --4 epochs


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

Fold n°1 LogLoss : 0.211707


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

Fold n°2 LogLoss : 0.396107


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

Fold n°3 LogLoss : 0.274473


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

Fold n°4 LogLoss : 0.352644


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

Fold n°5 LogLoss : 0.401113


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

Fold n°6 LogLoss : 0.59562


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

Fold n°7 LogLoss : 0.504781


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

Fold n°8 LogLoss : 0.313553


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

Fold n°9 LogLoss : 0.324909


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

Fold n°10 LogLoss : 0.384533

Overall CV LogLoss : 0.375944


Unnamed: 0,ID,Depression,Alcohol,Suicide,Drugs
0,02V56KMO,0.824155,0.021605,0.140665,0.013574
1,03BMGTOK,0.98992,0.002607,0.005385,0.002088
2,03LZVFM6,0.989009,0.002708,0.006034,0.002248
3,0EPULUM5,0.989099,0.002666,0.00622,0.002015
4,0GM4C5GD,0.013568,0.116576,0.035914,0.833942


## Model 2 : RoBERTa base

In [27]:
preds_roberta_base_789, loss_roberta_base_789 = get_model(train_df, 'roberta', 'roberta-base', 4, 128, 16, 789, 2e-05, 10)

sub_model2 = pd.read_csv(SAMPLE_SUB_FILE)
preds_model2 = np.mean(preds_roberta_base_789, 0)

for i in range(len(labels_ord)): 
    sub_model2[labels_ord[i]] = preds_model2[:,i]

sub_model2.to_csv('sub_model2_789.csv',index=False)
sub_model2.head()

Training 10 folds  with --roberta-base --4 epochs


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

Fold n°1 LogLoss : 0.515488


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

Fold n°2 LogLoss : 0.317253


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

Fold n°3 LogLoss : 0.264156


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

Fold n°4 LogLoss : 0.350653


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

Fold n°5 LogLoss : 0.461657


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

Fold n°6 LogLoss : 0.321061


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

Fold n°7 LogLoss : 0.265255


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

Fold n°8 LogLoss : 0.354428


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

Fold n°9 LogLoss : 0.562856


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

Fold n°10 LogLoss : 0.37894

Overall CV LogLoss : 0.379175


Unnamed: 0,ID,Depression,Alcohol,Suicide,Drugs
0,02V56KMO,0.869605,0.010437,0.114883,0.005075
1,03BMGTOK,0.988861,0.003188,0.006091,0.001859
2,03LZVFM6,0.989934,0.002909,0.005403,0.001754
3,0EPULUM5,0.988795,0.002959,0.006579,0.001666
4,0GM4C5GD,0.010971,0.144913,0.02148,0.822636


## Model 3 : RoBERTa base 5 folds

In [28]:
preds_roberta_base_5folds, loss_roberta_base_5folds = get_model(train_df, 'roberta', 'roberta-base', 4, 128, 16, 1997, 2e-05, 5)

sub_model3 = pd.read_csv(SAMPLE_SUB_FILE)
preds_model3 = np.mean(preds_roberta_base_5folds, 0)

for i in range(len(labels_ord)): 
    sub_model3[labels_ord[i]] = preds_model3[:,i]

sub_model3.to_csv('sub_model3_5folds.csv',index=False)
sub_model3.head()

Training 5 folds  with --roberta-base --4 epochs


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

Fold n°1 LogLoss : 0.329555


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

Fold n°2 LogLoss : 0.356188


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

Fold n°3 LogLoss : 0.422912


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

Fold n°4 LogLoss : 0.439481


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

Fold n°5 LogLoss : 0.336106

Overall CV LogLoss : 0.376848


Unnamed: 0,ID,Depression,Alcohol,Suicide,Drugs
0,02V56KMO,0.777048,0.027434,0.168602,0.026915
1,03BMGTOK,0.986406,0.003262,0.007469,0.002863
2,03LZVFM6,0.985281,0.003372,0.008298,0.00305
3,0EPULUM5,0.985636,0.003233,0.008452,0.002678
4,0GM4C5GD,0.021768,0.20601,0.051795,0.720427


## ENSEMBLE

In [29]:
# sub_model1 = pd.read_csv('./sub_model1_1997.csv')
# sub_model2 = pd.read_csv('./sub_model2_789.csv')
# sub_model3 = pd.read_csv('./sub_model3_5folds.csv')

In [30]:
ensemble_1_2 = sub_model1.copy()

for col in ensemble_1_2.columns[1:]:
    ensemble_1_2[col] = sub_model1[col]*0.68 + sub_model2[col]*0.32

ensemble_1_2.to_csv('ensemble_789_1997.csv',index=False)

## POST-PROCESSING

In [31]:
ensemble_1_2_3 = ensemble_1_2.copy()
ensemble_1_2_3['Drugs'] = sub_model3['Drugs']*0.8 + ensemble_1_2['Drugs']*0.2
ensemble_1_2_3['Suicide'] = sub_model3['Suicide']

In [36]:
final_sub = ensemble_1_2_3.copy()

threshold = 0.025

for i,e in enumerate(final_sub['Suicide']):
    if e<threshold:
        final_sub['Suicide'][i]=0
        
for i,e in enumerate(final_sub['Drugs']):
    if e<threshold:
        final_sub['Drugs'][i]=0
        
for i,e in enumerate(final_sub['Alcohol']):
    if e<threshold:
        final_sub['Alcohol'][i]=0
        
for i,e in enumerate(final_sub['Depression']):
    if e<threshold:
        final_sub['Depression'][i]=0


#final_sub.to_csv('final_sub.csv',index=False)

final_sub.head(5)

Unnamed: 0,ID,Depression,Alcohol,Suicide,Drugs
0,02V56KMO,0.838699,0.0,0.168602,0.0
1,03BMGTOK,0.989582,0.0,0.0,0.0
2,03LZVFM6,0.989305,0.0,0.0,0.0
3,0EPULUM5,0.989002,0.0,0.0,0.0
4,0GM4C5GD,0.0,0.125644,0.051795,0.742406


In [37]:
test_df.head(5)

Unnamed: 0,text,Alcohol,Depression,Drugs,Suicide
0,Now to overcome bad feelings and emotions,0,0,0,0
1,I feel like giving up in life,0,0,0,0
2,I was so depressed feel like got no strength t...,0,0,0,0
3,I feel so low especially since I had no one to...,0,0,0,0
4,can i be successful when I am a drug adduct?,0,0,0,0
