In [1]:
# Import the necessary libraries

import numpy as np
import pandas as pd
import torch
from transformers import RobertaModel, RobertaTokenizer
from torch import nn
from torch.autograd import Variable
from torch.utils.data import DataLoader
import torch.nn.functional as F

In [2]:
# Initialize the roberta tokenizer and model 

tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
roberta_model = RobertaModel.from_pretrained('roberta-base')

In [3]:
device="cuda:0"
roberta_model = roberta_model.to(device)

In [4]:
# Load input data


df_train = pd.read_csv("train_8k.csv")
df_valid = pd.read_csv("test_4k.csv")
df_trial = pd.read_csv("trial_1k.csv")


In [5]:
df_train.head()

Unnamed: 0,old id,new id,label,text,int_label
0,472189928340606976,519632796449378304,-3.99,I just love working for 6.5 hours without a br...,-4
1,472440774785650688,519632825167773696,-3.92,The happy song does not invoke good feelings. ...,-4
2,473085653454827520,519632853982650370,-2.22,Having to run to the train first thing in the ...,-2
3,463445012374499328,519632882940129280,-0.56,@OmniJerBear haha should have had #sarcasm at ...,-1
4,463501257110724610,519632911473987584,-1.27,Really excited for these last few days of scho...,-1


In [6]:
df_train.shape

(7985, 5)

In [7]:
df_train = df_train.drop(columns = ['old id', 'new id', 'label'])

In [8]:
df_train['int_label'].value_counts()

-3    2966
-2    2931
-1     860
-4     363
 0     344
 2     195
 1     163
 3     106
 4      49
-5       6
 5       2
Name: int_label, dtype: int64

In [9]:
df_train.rename(columns={'int_label': 'label'}, inplace=True)
df_train = df_train[["label", "text"]]

In [10]:
df_train.head()

Unnamed: 0,label,text
0,-4,I just love working for 6.5 hours without a br...
1,-4,The happy song does not invoke good feelings. ...
2,-2,Having to run to the train first thing in the ...
3,-1,@OmniJerBear haha should have had #sarcasm at ...
4,-1,Really excited for these last few days of scho...


In [11]:
df_valid.head()

Unnamed: 0,id,label,category,text
0,5.376513e+17,-3,sarcasm,So great to come back to my dorm and find that...
1,5.383325e+17,-2,sarcasm,If jean howie my neighbour is at my mums weddi...
2,5.380508e+17,-3,sarcasm,@KTHopkins @MissKatiePrice LOL@ katie hopkins ...
3,5.380175e+17,-3,sarcasm,@stuarteagle QPR? They looked terrible yesterd...
4,5.379588e+17,0,sarcasm,"Next! Jamie Foxx ft. 2 Chainz ""Party Ain't a P..."


In [12]:
df_valid = df_valid.drop(columns = ['id', 'category'])

In [13]:
df_trial.head()

Unnamed: 0,id,label,text,int_label
0,465424601124974592,-3.0,RT @BeckyMyers3: General studies exam tomorrow...,-3
1,465422141643845632,-3.4,RT @TheTweetOfGod: A racist NBA owner makes ab...,-3
2,465420676590231552,-2.8,Bit ironic Mo Farrah stars in the Weetabix adv...,-3
3,465420343344394240,-2.8,@JoshFreedman_ It is about as much an election...,-3
4,465414678978756609,-2.4,Just looked out the window. About as inviting ...,-2


In [14]:
df_trial = df_trial.drop(columns = ['id', 'label'])

In [15]:
df_trial.rename(columns={'int_label': 'label'}, inplace=True)
df_trial = df_trial[["label", "text"]]

In [16]:
df_train.shape, df_trial.shape, df_valid.shape

((7985, 2), (592, 2), (3957, 2))

In [17]:
df_train = pd.concat([df_train, df_trial], ignore_index=True)

In [18]:
df_train.shape, df_valid.shape

((8577, 2), (3957, 2))

In [19]:
df_train.head()

Unnamed: 0,label,text
0,-4,I just love working for 6.5 hours without a br...
1,-4,The happy song does not invoke good feelings. ...
2,-2,Having to run to the train first thing in the ...
3,-1,@OmniJerBear haha should have had #sarcasm at ...
4,-1,Really excited for these last few days of scho...


In [20]:
df_valid.head()

Unnamed: 0,label,text
0,-3,So great to come back to my dorm and find that...
1,-2,If jean howie my neighbour is at my mums weddi...
2,-3,@KTHopkins @MissKatiePrice LOL@ katie hopkins ...
3,-3,@stuarteagle QPR? They looked terrible yesterd...
4,0,"Next! Jamie Foxx ft. 2 Chainz ""Party Ain't a P..."


In [21]:
from sklearn.preprocessing import LabelEncoder

In [22]:
myle = LabelEncoder()

In [23]:
df_train['label'] = myle.fit_transform(df_train['label'])

df_valid['label'] = myle.fit_transform(df_valid['label'])

In [24]:
df_train['label'].value_counts()

2     3191
3     3067
4      925
1      410
5      377
7      218
6      196
8      126
9       56
0        8
10       3
Name: label, dtype: int64

In [25]:
df_valid['label'].value_counts() 

3     1530
2      730
4      671
5      293
8      201
6      164
7      150
9      111
1       99
10       4
0        4
Name: label, dtype: int64

In [26]:
tweets_train = df_train['text']
tweets_valid = df_valid['text']

tweets_train = tweets_train.tolist()
tweets_valid = tweets_valid.tolist()

In [27]:
tokens_train = tokenizer(tweets_train, truncation=True)
tokens_valid = tokenizer(tweets_valid, truncation=True)

In [28]:
df_train['tokens'] = tokens_train['input_ids']
df_valid['tokens'] = tokens_valid['input_ids']

In [29]:
df_train['n_tok'] = df_train['tokens'].apply(len)
df_valid['n_tok'] = df_valid['tokens'].apply(len)

df_train['n_tok'].describe()

count    8577.000000
mean       29.213361
std        10.932552
min         8.000000
25%        22.000000
50%        28.000000
75%        35.000000
max       512.000000
Name: n_tok, dtype: float64

In [30]:
df_train.head()

Unnamed: 0,label,text,tokens,n_tok
0,1,I just love working for 6.5 hours without a br...,"[0, 100, 95, 657, 447, 13, 231, 4, 245, 722, 3...",32
1,1,The happy song does not invoke good feelings. ...,"[0, 133, 1372, 2214, 473, 45, 32550, 205, 6453...",21
2,3,Having to run to the train first thing in the ...,"[0, 15852, 7, 422, 7, 5, 2341, 78, 631, 11, 5,...",23
3,4,@OmniJerBear haha should have had #sarcasm at ...,"[0, 1039, 673, 119, 5107, 25786, 40237, 46116,...",19
4,4,Really excited for these last few days of scho...,"[0, 30327, 2283, 13, 209, 94, 367, 360, 9, 334...",24


In [31]:
df_train['text'][0]

"I just love working for 6.5 hours without a break or anything. Especially when I'm on my period and have awful cramps. #NOT"

In [32]:
df_valid.head()

Unnamed: 0,label,text,tokens,n_tok
0,2,So great to come back to my dorm and find that...,"[0, 2847, 372, 7, 283, 124, 7, 127, 18344, 8, ...",33
1,3,If jean howie my neighbour is at my mums weddi...,"[0, 1106, 1236, 12001, 141, 324, 127, 14915, 1...",31
2,2,@KTHopkins @MissKatiePrice LOL@ katie hopkins ...,"[0, 1039, 530, 3732, 1517, 7327, 787, 22885, 2...",43
3,2,@stuarteagle QPR? They looked terrible yesterd...,"[0, 1039, 620, 41962, 242, 21851, 1209, 4454, ...",22
4,5,"Next! Jamie Foxx ft. 2 Chainz ""Party Ain't a P...","[0, 19192, 328, 6541, 2063, 1178, 16935, 4, 13...",32


In [33]:
class ds_sentiment:
    def __init__ (self,df,bs,padlen=64,xvar='tokens',yvar='label',len_var='n_tok',padding_idx=1):
        self.x=df[xvar]
        self.y=df[yvar]
        self.padlen=padlen
        self.padding_idx=padding_idx
        self.len_var=df[len_var]
        self.bs=bs
    
        self.len_var=self.len_var.clip(0,padlen)
    
    def pad (self,x):
        out=np.ones(self.padlen)*self.padding_idx
        out=out.astype(np.int64)
        if len(x)>=self.padlen:
            out[:]=x[:self.padlen]
        else:
            out[:len(x)]=x
        return out
        
    def __getitem__(self,idx):
        #import pdb
        #pdb.set_trace()
        return self.pad(self.x.iloc[idx]),self.y.iloc[idx],self.len_var.iloc[idx]
    
    def __len__(self):
        return len(self.x)

In [34]:
bs = 10
bptt= 70
padlen = 50

df_train.loc[df_train['n_tok'] > padlen, ['n_tok']] = padlen
df_valid.loc[df_valid['n_tok'] > padlen, ['n_tok']] = padlen

df_train['n_tok'].describe()

count    8577.000000
mean       29.039408
std         9.143004
min         8.000000
25%        22.000000
50%        28.000000
75%        35.000000
max        50.000000
Name: n_tok, dtype: float64

In [35]:
dstrain=ds_sentiment(df_train,bs,padlen)
dsvalid=ds_sentiment(df_valid,bs,padlen)

In [36]:
dltrain = DataLoader(dstrain,bs,True)
dlvalid = DataLoader(dsvalid,bs,False)

In [37]:
for xb,yb,xlen in dltrain:
    break

In [38]:
xb, yb, xlen

(tensor([[    0, 40860,    38,   437,    10,  1508, 10746,   122,     6,  1183,
             66,     4,   849,    29,  9636, 16836,     2,     1,     1,     1,
              1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
              1,     1,     1,     1,     1,     1,     1,     1,     1,     1,
              1,     1,     1,     1,     1,     1,     1,     1,     1,     1],
         [    0,  5625,    15,   127,   422,    38,    21, 15378,    30,   132,
           8516, 17559,    29,     4,    38,  1467,    38,    21,    11,    13,
             24,    77,    51,  1340,   162,     8,   554, 16022,  1975,  9997,
          41167,   162,     4, 41926,   328,   849,  3654, 17841, 27969,     2,
              1,     1,     1,     1,     1,     1,     1,     1,     1,     1],
         [    0,   510, 14851,    10,   372,   891,     9,   360,     7,   213,
              7,     5,  4105,     4,   849,  3654,   849,  9946,   219, 17841,
          10674, 31193, 12736,     2, 

In [39]:
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

In [40]:
n_emb=768 #650
n_hidden=64 #400
n_layers= 2 # 2
dropout=0.1 # 0.5
wd=1e-5
bidirectional=True
dropout_e=0.2 # 0.5 - changing to 0.4, 0.3 or any dropout value did not make much difference
dropout_o=0.1 #0.5
n_out=11

In [41]:
class sentiment_classifier (nn.Module):
    def __init__(self,roberta_model,n_emb,n_hidden,n_layers,bidirectional,bs,device,dropout_e=0.05,dropout=0.5,\
                 dropout_o=0.5,n_out=11,n_filters=100,filter_sizes=[3,4,5]):
        super().__init__()
        self.roberta_model,self.n_emb,self.n_hidden,self.n_layers,self.bidirectional,self.bs,self.device=\
                            roberta_model,n_emb,n_hidden,n_layers,bidirectional,bs,device
        self.n_out,self.n_filters,self.filter_sizes=n_out,n_filters,filter_sizes
        self.dropout_e,self.dropout,self.dropout_o=dropout_e,dropout,dropout_o
        
        self.create_architecture()
        self.init_hidden()
        self.criterion=nn.CrossEntropyLoss()
    
    def set_dropouts(self, dropout, dropout_o, dropout_e):
        self.dropout, self.dropout_o, self.dropout_e = dropout, dropout_o, dropout_e
    
    
    def freeze_embedding(self):
        
        for param in self.encoder.parameters():
            param.requires_grad = False
    
         
    def unfreeze_embedding(self):
        
        for param in self.encoder.parameters():
            param.requires_grad = True
    
    def init_hidden(self):
        # Initialize hidden
        self.hidden=(Variable(torch.zeros(self.n_layers,self.bs,self.n_hidden,requires_grad=False).to(self.device)),
                     Variable(torch.zeros(self.n_layers,self.bs,self.n_hidden,requires_grad=False).to(self.device)))
    

    def create_architecture(self):
        
        #self.dropout_enc = nn.Dropout(self.dropout_e)
        self.encoder = self.roberta_model
        
        
        # LSTM Layer
        self.lstm = nn.LSTM(self.n_emb,self.n_hidden,self.n_layers,batch_first=True,dropout=self.dropout,\
                          bidirectional=self.bidirectional)
        
        # embs are going to be of shape n_batch * n_seq * n_emb
        #self.dropout_op = nn.Dropout(self.dropout_o)
        
        self.max_pool1d = torch.nn.MaxPool1d(50, stride=1)
        
        self.flat = nn.Flatten()
        

        self.project = nn.Linear(896,64)
        
        self.fc = nn.Linear(64,self.n_out)
        

        
    def forward (self,Xb,Yb,Xb_lengths):
        
        ####RNN PORTION
        roberta_out = self.encoder(Xb)
        last_hidden_states = roberta_out.last_hidden_state
        embs = last_hidden_states
        #print('embs : ', embs.shape)
        
        
        #packed_embs = pack_padded_sequence(embs,Xb_lengths.cpu(),batch_first=True, enforce_sorted=False)
        
        
        lstm_out,(hidden,cell)=self.lstm(embs)
        #print('lstm_out : ', lstm_out.shape)
        
        
        #lstm_out,lengths=pad_packed_sequence(lstm_out,batch_first=True)
        
        
        
        ## Concatenate
        catted = torch.cat([embs.permute(0,2,1),lstm_out.permute(0,2,1)],dim=1)
        #print('catted : ', catted.shape)
        
        
        ## Pooling
        max_pool = self.max_pool1d(catted)
        #print('max_pool : ', max_pool.shape)
        
        
        ## Project to latent vectors
        latent = self.project(self.flat(max_pool))
        #print('latent : ', latent.shape)
        
        
        ## Reshape
        #ok = max_pool.permute(0,2,1)
        #ok = ok.reshape(ok.size(0),ok.size(1)*ok.size(2))
        #print('ok : ', ok.shape)
        
        
        #Final output
        preds = self.fc(latent)
        

        loss = self.criterion(preds,Yb.contiguous().long().view(-1))

        
        return preds,loss

In [42]:
def accuracy_multinomial(preds, actual, device="cpu", cutoff=0.5):
    """
    Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
    """

    #round predictions to the closest integer
    ela=F.softmax(preds, dim=1)
    preds=ela.max(1)[1]
    correct=preds==actual 
    acc = correct.float().sum()/len(correct)
    return acc


In [43]:
class Learner():
    def __init__(self,model,optimizer,metric_fn,device,bptt=12,print_every=5,clip_val=None,\
                 cycle_mult=0,lr_decay=1,wd_mult=1):
        self.model,self.optimizer,self.metric_fn,self.device,self.print_every,self.bptt,self.losses,self.clip_val=\
            model,optimizer,metric_fn,device,print_every,bptt,[],clip_val
        self.n_epochs=1
        self.cycle_mult,self.lr_decay=cycle_mult,lr_decay
        self.wd_mult=wd_mult
        for param_group in self.optimizer.param_groups:
            self.start_lr=param_group['lr']
            self.start_wd=param_group['weight_decay']
        self.wd=self.start_wd
        self.lr=self.start_lr
        self.n_epoch=0
        self.lrs=[1e-2,5e-3,1e-4,5e-4]
        self.preds,self.preds_valid,self.trainY,self.actual=[],[],[],[]
        
    def fit (self,Xb,Yb,Xlen,mode_train=True):
        if mode_train:
            self.model.train()
        else:
            self.model.eval()
            
        preds,loss=self.model(Xb,Yb,Xlen)
        
        with torch.no_grad():
            acc=self.metric_fn(preds,Yb.view(-1),self.device)
            acc=acc.item()
            
            
            if mode_train:
                self.trainY.append(Yb.view(-1))
                self.preds.append(preds.data)
            else:
                self.actual.append(Yb.view(-1))
                self.preds_valid.append(preds.data)

            
            del preds
        
        if mode_train:
            if 1==0:
                lr =self.lrs[torch.randint(0,4,(1,))]
                for param_group in self.optimizer.param_groups:
                    param_group['lr']=lr
            
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
        myloss=loss.item()
        del loss
        
        if self.clip_val is not None:
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip_val)
        
        return myloss, acc
    
    def lr_find (self,start_lr,end_lr,iterator,n_batch):
        losses,lrs=[],[]
        ratio=end_lr/start_lr
        num_steps=n_batch
        lr=start_lr
        for i in range(num_steps):            
            lr=lr*(end_lr/start_lr)**(1/num_steps)
            lrs.append(lr)
        self.lrs=lrs
        self.run_epoch(iterator,mode_train=True,lrs=lrs)
    
    def run_epoch(self,iterator,mode_train,lrs=None):
        epoch_loss,epoch_acc,i,k=0,0,0,0
        self.model.init_hidden()
        for Xb,Yb,Xlen in iterator:
            Xb=Xb.to(self.device)
            Yb=Yb.to(self.device)
            Xlen=Xlen.to(self.device)
            
            if lrs is not None:
                lr=lrs[k]
                for param_group in self.optimizer.param_groups:
                    param_group['lr']=lr 
            

            loss,acc=self.fit(Xb,Yb,Xlen,mode_train)
            
            if lrs is not None:
                self.losses.append(loss)
            
            
            epoch_loss+=loss
            epoch_acc+=acc
            
            k=k+1
            if k%self.print_every == 0:
                if k:
                    print (f'Batch:{k} {epoch_loss/(k)}  {epoch_acc/(k)}')  
                    torch.cuda.empty_cache()
        epoch_loss=epoch_loss/len(iterator)
        epoch_acc=epoch_acc/len(iterator)
            
        return epoch_loss,epoch_acc
    
    def plot_lrs(self, n_roll=1):
        import seaborn as sns
        ax=sns.lineplot(x=self.lrs,y=pd.Series(self.losses).rolling(n_roll).mean())
        ax.set_xscale('log')
        ax.set_ylabel('Loss')
        ax.set_xlabel('Learning Rate')

     
    def run_epochs(self,dltrain,dlvalid,n_epochs=1):
        
        if self.cycle_mult > 0:
            reset_cycle=self.cycle_mult
        
        for epoch in range(n_epochs):                

            
            loss,acc=self.run_epoch(dltrain,True)
            lossv,accv=self.run_epoch(dlvalid,mode_train=False)
            print (f'Epoch:{epoch} Learning rate {self.lr} Weight Decay {self.wd} Train Loss:{loss} Train Accuracy:{acc} Valid Loss:{lossv} Valid Accuracy:{accv}')
        
            if self.cycle_mult:
                if self.n_epoch==reset_cycle:
                    self.lr=self.start_lr
                    #self.wd=self.start_wd
                    reset_cycle=self.n_epoch+reset_cycle
                else:
                    self.lr*=(self.lr_decay**self.n_epoch)  
                    if self.n_epoch>1:
                        self.wd*=self.wd_mult
            self.n_epoch+=1
                
                
            for param_group in self.optimizer.param_groups:
                param_group['lr']=self.lr
                #param_group['weight_decay']=self.wd

In [44]:
device="cuda:0"

In [45]:
model_sentiment=sentiment_classifier (roberta_model,n_emb,n_hidden,n_layers,bidirectional,bs,device,dropout_e,dropout,\
                 dropout_o,n_out=11)
model_sentiment=model_sentiment.to(device)

In [46]:
def count_parameters(model):
        return sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f'The model has {count_parameters(model_sentiment):,} trainable parameters')

The model has 125,230,091 trainable parameters


In [47]:
optimizer = torch.optim.Adam(model_sentiment.parameters(),lr=2e-5, eps=1e-6, weight_decay=wd)

In [48]:
learner = Learner(model_sentiment,optimizer,accuracy_multinomial,device,bptt,100,0.25,cycle_mult=10)

In [49]:
learner.lr_decay, learner.wd_mult

(1, 1)

In [50]:
#model_sentiment.freeze_embedding()

In [51]:
#model_sentiment.unfreeze_embedding()

In [134]:
learner.run_epochs(dltrain,dlvalid,1)

Batch:100 0.9794672471284867  0.6000000071525574
Batch:200 0.9657608819752932  0.6100000075250864
Batch:300 0.9898218957086404  0.6000000072767337
Batch:400 0.9860010497644544  0.6015000076033175
Batch:500 0.984073109537363  0.5990000077039004
Batch:600 0.9919110686331988  0.5965000075722734
Batch:700 0.9990650079718658  0.5940000073931047
Batch:800 0.9976757811196149  0.5961250073555857
Batch:100 1.5071546304225922  0.37300000615417955
Batch:200 1.9387075287103652  0.30800000552088025
Batch:300 1.9978224752346674  0.27200000517070294
Epoch:0 Learning rate 2e-05 Weight Decay 1e-05 Train Loss:0.9987884940696763 Train Accuracy:0.5963536537836343 Valid Loss:1.9846045870070506 Valid Accuracy:0.27673160706204597


In [102]:
learner.run_epochs(dltrain,dlvalid,1) # last save here

Batch:100 0.939082071185112  0.6360000079870224
Batch:200 0.9373407655954361  0.6365000091493129
Batch:300 0.9332508767644564  0.6363333415488402
Batch:400 0.9319104117900133  0.6352500082924962
Batch:500 0.9410927782654762  0.6278000077903271
Batch:600 0.9375270954767863  0.6311666745692491
Batch:700 0.9410064971446991  0.6311428649084908
Batch:800 0.9434232905879617  0.6297500080801547
Batch:100 1.2345005133748055  0.5600000098347664
Batch:200 1.6866506744921208  0.4300000075623393
Batch:300 1.7293464050690333  0.41866667379935585
Epoch:0 Learning rate 2e-05 Weight Decay 1e-05 Train Loss:0.9384517605418647 Train Accuracy:0.6323010404105787 Valid Loss:1.7719357502881927 Valid Accuracy:0.3979798049261474


## Get the cosine similarity metric via Semeval's script

In [135]:
df_test = pd.read_csv("test_3957_preprocessed.tsv", sep='\t')

In [136]:
df_test.head()

Unnamed: 0,id,label,category,text
0,537651335752323073,-3,sarcasm,great come back dorm find roommate rearranged ...
1,538332513408937986,-2,sarcasm,jean howie neighbour mum wedding make whole da...
2,538050779824025600,-3,sarcasm,lol katie hopkins u talk shagging married men ...
3,538017499724279809,-3,sarcasm,qpr looked terrible yesterday ferdinand player
4,537958766910926848,0,sarcasm,next jamie foxx ft 2 chainz party party tune t...


In [137]:
df_valid.head()

Unnamed: 0,label,text,tokens,n_tok
0,2,So great to come back to my dorm and find that...,"[0, 2847, 372, 7, 283, 124, 7, 127, 18344, 8, ...",33
1,3,If jean howie my neighbour is at my mums weddi...,"[0, 1106, 1236, 12001, 141, 324, 127, 14915, 1...",31
2,2,@KTHopkins @MissKatiePrice LOL@ katie hopkins ...,"[0, 1039, 530, 3732, 1517, 7327, 787, 22885, 2...",43
3,2,@stuarteagle QPR? They looked terrible yesterd...,"[0, 1039, 620, 41962, 242, 21851, 1209, 4454, ...",22
4,5,"Next! Jamie Foxx ft. 2 Chainz ""Party Ain't a P...","[0, 19192, 328, 6541, 2063, 1178, 16935, 4, 13...",32


In [138]:
df_test['label'] = df_test['label']+5

In [139]:
(df_test['label'] == df_valid['label']).value_counts()

True    3957
Name: label, dtype: int64

In [140]:
df_test.shape

(3957, 4)

In [141]:
ids = df_test['id']

In [142]:
proper_preds_valid = [item.max(1)[1] for item in learner.preds_valid]

In [143]:
#find the outputs of our model for the test data

from itertools import chain

preds_valid=list(chain.from_iterable(proper_preds_valid))[-df_valid.shape[0]:]
actual=list(chain.from_iterable(learner.actual))[-df_valid.shape[0]:]

preds_valid=[x.item() for x in preds_valid]
actual=[x.item() for x in actual]

In [144]:
df_valid.head()

Unnamed: 0,label,text,tokens,n_tok
0,2,So great to come back to my dorm and find that...,"[0, 2847, 372, 7, 283, 124, 7, 127, 18344, 8, ...",33
1,3,If jean howie my neighbour is at my mums weddi...,"[0, 1106, 1236, 12001, 141, 324, 127, 14915, 1...",31
2,2,@KTHopkins @MissKatiePrice LOL@ katie hopkins ...,"[0, 1039, 530, 3732, 1517, 7327, 787, 22885, 2...",43
3,2,@stuarteagle QPR? They looked terrible yesterd...,"[0, 1039, 620, 41962, 242, 21851, 1209, 4454, ...",22
4,5,"Next! Jamie Foxx ft. 2 Chainz ""Party Ain't a P...","[0, 19192, 328, 6541, 2063, 1178, 16935, 4, 13...",32


In [145]:
import numpy as np
preds_valid = np.array(preds_valid)
preds_valid = preds_valid - 5

In [146]:

my_model_test_outputs = pd.DataFrame()
my_model_test_outputs['id'] = ids
my_model_test_outputs['output'] = preds_valid

In [147]:
my_model_test_outputs.head()

Unnamed: 0,id,output
0,537651335752323073,-3
1,538332513408937986,-3
2,538050779824025600,-3
3,538017499724279809,-3
4,537958766910926848,-1


In [148]:
my_model_test_outputs.to_csv("exact_try.tsv", sep="\t", index=False)    #got cos = 0.82

## MSE

In [149]:
mse = nn.MSELoss()
input = torch.tensor(preds_valid)
target = torch.tensor((df_test['label']-5).to_numpy())
output = mse(input.float(), target.float())
output

tensor(2.6831)

In [101]:
COMBO_PATH = "C:/Users/Dennis/Desktop/4 trainings/potamias - S15-T11/3. ROBERTA - Potamias/POTAMIAS LATEST EFFORTS/exact_saves"

torch.save (roberta_model,f'{COMBO_PATH}/roberta_model')
torch.save(model_sentiment.state_dict(),f'{COMBO_PATH}/model_sentiment_state_dict')
torch.save(optimizer.state_dict(),f'{COMBO_PATH}/optimizer_state_dict')
torch.save (model_sentiment,f'{COMBO_PATH}/model_sentiment')
torch.save (optimizer,f'{COMBO_PATH}/optimizer')
torch.save (learner,f'{COMBO_PATH}/learner')