# Fine tuning version

based on: https://github.com/patil-suraj/exploring-T5/blob/master/t5_fine_tuning.ipynb (IMDB sentiment analysis) | https://towardsdatascience.com/the-guide-to-multi-tasking-with-the-t5-transformer-90c70a08837b (The Guide to Multi-Tasking with the T5 Transformer) | https://discuss.huggingface.co/t/t5-classification-using-text2text/504/8 (SST2 sentiment analysis classification)

# Imports

In [11]:
import sys
!{sys.executable} -m pip install --user pytorch_lightning

[33mThe directory '/home/atroska/.cache/pip/http' or its parent directory is not owned by the current user and the cache has been disabled. Please check the permissions and owner of that directory. If executing pip with sudo, you may want sudo's -H flag.[0m
[33mThe directory '/home/atroska/.cache/pip' or its parent directory is not owned by the current user and caching wheels has been disabled. check the permissions and owner of that directory. If executing pip with sudo, you may want sudo's -H flag.[0m
Collecting pytorch_lightning
[?25l  Downloading https://files.pythonhosted.org/packages/07/0c/e2d52147ac12a77ee4e7fd7deb4b5f334cfb335af9133a0f2780c8bb9a2c/pytorch_lightning-1.2.10-py3-none-any.whl (841kB)
[K    100% |████████████████████████████████| 849kB 5.2MB/s ta 0:00:01
[?25hCollecting PyYAML!=5.4.*,>=5.1 (from pytorch_lightning)
Collecting torchmetrics==0.2.0 (from pytorch_lightning)
[?25l  Downloading https://files.pythonhosted.org/packages/3a/42/d984612cabf005a265aa99c8d

In [1]:
import argparse
import glob
import os
import json
import time
import logging
import random
import re
from itertools import chain
from string import punctuation
from transformers import pipeline

import nltk
nltk.download('punkt')
from nltk.tokenize import sent_tokenize

import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import pytorch_lightning as pl


from transformers import (
    AdamW,
    T5ForConditionalGeneration,
    T5Tokenizer,
    get_linear_schedule_with_warmup
)

DEBUG:tensorflow:Falling back to TensorFlow client; we recommended you install the Cloud TPU client directly with pip install cloud-tpu-client.
[nltk_data] Downloading package punkt to /home/atroska/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


# Methods

In [2]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
      torch.cuda.manual_seed_all(seed)

In [3]:
def load_tweet(path: str, label: int):
    data = []
    with open(path) as file:
        for line in file:
            data.append(line)
    data_df = pd.DataFrame(data, columns = {'tweet'})
    data_df['label'] = label
    print('Loaded data: ' + path)
    return data_df

In [4]:
def concat_data(dset1, dset2):
    df = pd.concat([dset1, dset2])
    print('Concatenated data')
    return df

In [5]:
def preprocess_tweet(tweet):
    '''
    :param tweet: tweet as a string
    :return: string s which is preprocessed tweet
    '''
    tweet = re.sub('<user>', '', tweet) # remove user tags
    tweet = re.sub('<url>', '', tweet) # remove url tags
    tweet = re.sub('#\w*', '', tweet) # remove hashtags
    tweet = re.sub('[0-9]', '', tweet) # remove numbers
    tweet = re.sub('[^\w\s]', '', tweet) # remove punctuation
    tweet = re.sub('\s+', ' ', tweet) # remove excess whitespace
    tweet = re.sub('^\s', '', tweet) # remove excess whitespace
    tweet = re.sub('\s$', '', tweet) # remove excess whitespace
    #tweet += '\n'
    #tweet = ' '.join(ws.segment(tweet)) # segment words
    #tweet = ' '.join([speller(w) for w in tweet.split()]) # spell checking
    return tweet

In [6]:
def sst2_text_processing(tweet):
    #add the correct prefix in order to use pretrained t5 model
    new_tweet = "sst2 sentence: " + tweet
    return new_tweet
    

In [7]:
def accuracy(true, preds):
    from sklearn.metrics import accuracy_score
    if len(preds) != len(true):
        print("ERROR: preds and true don't have the same length")
        return 0
    else:
        return accuracy_score(true, preds, normalize=False)

# Main

In [8]:
#POSITIVE_PATH = '/cluster/home/alefevre/data/twitter/Datas/train_pos.txt'
#NEGATIVE_PATH  = '/cluster/home/alefevre/data/twitter/Datas/train_neg.txt'
#OUTPUT_PREDS_PATH = '/cluster/home/alefevre/results/twitter/XLNET_test_preds.txt'
#PRETRAINED_PATH = 'xlnet-base-cased'

POSITIVE_PATH = '/home/atroska/Documents/Zurich/MA2/CIL/Project/Datas/twitter-datasets/train_pos.txt'
NEGATIVE_PATH  = '/home/atroska/Documents/Zurich/MA2/CIL/Project/Datas/twitter-datasets/train_neg.txt'
OUTPUT_PREDS_PATH = '/home/atroska/Documents/Zurich/MA2/CIL/Project/Datas/T5google_test_preds.txt'
N_EPOCHS = 5 # or 3 ?
BATCH_SIZE = 16
TEST_SIZE = 0.10


set_seed(42)


#load datas
pos_df = load_tweet(POSITIVE_PATH, 1)
neg_df = load_tweet(NEGATIVE_PATH, 0)

#concat datas
all_tweet_df = concat_data(pos_df, neg_df)

#apply basic preprocessing (tobias)
all_tweet_df['tweet'] = all_tweet_df['tweet'].apply(lambda row: preprocess_tweet(str(row)))

  return torch._C._cuda_getDeviceCount() > 0


Loaded data: /home/atroska/Documents/Zurich/MA2/CIL/Project/Datas/twitter-datasets/train_pos.txt
Loaded data: /home/atroska/Documents/Zurich/MA2/CIL/Project/Datas/twitter-datasets/train_neg.txt
Concatenated data


In [9]:
all_tweet_df

Unnamed: 0,tweet,label
0,i dunno justin read my mention or not only jus...,1
1,because your logic is so dumb i wont even crop...,1
2,just put casper in a box looved the battle,1
3,thanks sir dont trip lil mama just keep doin y...,1
4,visiting my brother tmr is the bestest birthda...,1
...,...,...
99995,cant wait to fake tan tonight hate being pale,0
99996,darling i lost my internet connection and its ...,0
99997,kanguru defender basic gb usb flash drive kdfb...,0
99998,rizan is sad now,0


# Transformer pipeline analysis

## Example

## On our dataset

In [10]:
#load datas
pos_df = load_tweet(POSITIVE_PATH, 1)
neg_df = load_tweet(NEGATIVE_PATH, 0)

#concat datas
all_tweet_df = concat_data(pos_df, neg_df)

#apply basic preprocessing (tobias)
all_tweet_df['tweet'] = all_tweet_df['tweet'].apply(lambda row: preprocess_tweet(str(row)))


Loaded data: /home/atroska/Documents/Zurich/MA2/CIL/Project/Datas/twitter-datasets/train_pos.txt
Loaded data: /home/atroska/Documents/Zurich/MA2/CIL/Project/Datas/twitter-datasets/train_neg.txt
Concatenated data


In [11]:
all_tweet_df

Unnamed: 0,tweet,label
0,i dunno justin read my mention or not only jus...,1
1,because your logic is so dumb i wont even crop...,1
2,just put casper in a box looved the battle,1
3,thanks sir dont trip lil mama just keep doin y...,1
4,visiting my brother tmr is the bestest birthda...,1
...,...,...
99995,cant wait to fake tan tonight hate being pale,0
99996,darling i lost my internet connection and its ...,0
99997,kanguru defender basic gb usb flash drive kdfb...,0
99998,rizan is sad now,0


In [12]:
all_tweet_df_randomize =  all_tweet_df.sample(frac=1)

In [13]:
all_tweet_df_randomize

Unnamed: 0,tweet,label
19737,so nice out but so fkn cold,0
72272,okay i hope you will not fool me as did one pe...,1
58154,assassins creed limited edition video game ass...,0
65426,lol qt lmao hell we can start our day early,1
30074,me and my friend think you look like stacy sol...,1
...,...,...
19879,rt i dont want to wash away the glitter,0
3694,hiring sr quality assurance testing specialist...,0
31932,followed me for a day and then unfollowed me,0
46867,i wont be there somethings wrong with my head ...,0


In [14]:
classifier = pipeline('sentiment-analysis')

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /distilbert-base-uncased-finetuned-sst-2-english/resolve/main/config.json HTTP/1.1" 200 0
DEBUG:filelock:Attempting to acquire lock 140369799667104 on /home/atroska/.cache/huggingface/transformers/4e60bb8efad3d4b7dc9969bf204947c185166a0a3cf37ddb6f481a876a3777b5.9f8326d0b7697c7fd57366cdde57032f46bc10e37ae81cb7eb564d66d23ec96b.lock
INFO:filelock:Lock 140369799667104 acquired on /home/atroska/.cache/huggingface/transformers/4e60bb8efad3d4b7dc9969bf204947c185166a0a3cf37ddb6f481a876a3777b5.9f8326d0b7697c7fd57366cdde57032f46bc10e37ae81cb7eb564d66d23ec96b.lock
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "GET /distilbert-base-uncased-finetuned-sst-2-english/resolve/main/config.json HTTP/1.1" 200 629


HBox(children=(HTML(value='Downloading'), FloatProgress(value=0.0, max=629.0), HTML(value='')))

DEBUG:filelock:Attempting to release lock 140369799667104 on /home/atroska/.cache/huggingface/transformers/4e60bb8efad3d4b7dc9969bf204947c185166a0a3cf37ddb6f481a876a3777b5.9f8326d0b7697c7fd57366cdde57032f46bc10e37ae81cb7eb564d66d23ec96b.lock
INFO:filelock:Lock 140369799667104 released on /home/atroska/.cache/huggingface/transformers/4e60bb8efad3d4b7dc9969bf204947c185166a0a3cf37ddb6f481a876a3777b5.9f8326d0b7697c7fd57366cdde57032f46bc10e37ae81cb7eb564d66d23ec96b.lock
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co





DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /distilbert-base-uncased-finetuned-sst-2-english/resolve/main/pytorch_model.bin HTTP/1.1" 302 0
DEBUG:filelock:Attempting to acquire lock 140369799667440 on /home/atroska/.cache/huggingface/transformers/8d04c767d9d4c14d929ce7ad8e067b80c74dbdb212ef4c3fb743db4ee109fae0.9d268a35da669ead745c44d369dc9948b408da5010c6bac414414a7e33d5748c.lock
INFO:filelock:Lock 140369799667440 acquired on /home/atroska/.cache/huggingface/transformers/8d04c767d9d4c14d929ce7ad8e067b80c74dbdb212ef4c3fb743db4ee109fae0.9d268a35da669ead745c44d369dc9948b408da5010c6bac414414a7e33d5748c.lock
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): cdn-lfs.huggingface.co
DEBUG:urllib3.connectionpool:https://cdn-lfs.huggingface.co:443 "GET /distilbert-base-uncased-finetuned-sst-2-english/60554cbd7781b09d87f1ececbea8c064b94e49a7f03fd88e8775bfe6cc3d9f88 HTTP/1.1" 200 267844284


HBox(children=(HTML(value='Downloading'), FloatProgress(value=0.0, max=267844284.0), HTML(value='')))

DEBUG:filelock:Attempting to release lock 140369799667440 on /home/atroska/.cache/huggingface/transformers/8d04c767d9d4c14d929ce7ad8e067b80c74dbdb212ef4c3fb743db4ee109fae0.9d268a35da669ead745c44d369dc9948b408da5010c6bac414414a7e33d5748c.lock
INFO:filelock:Lock 140369799667440 released on /home/atroska/.cache/huggingface/transformers/8d04c767d9d4c14d929ce7ad8e067b80c74dbdb212ef4c3fb743db4ee109fae0.9d268a35da669ead745c44d369dc9948b408da5010c6bac414414a7e33d5748c.lock





DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /distilbert-base-uncased-finetuned-sst-2-english/resolve/main/config.json HTTP/1.1" 200 0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /distilbert-base-uncased-finetuned-sst-2-english/resolve/main/vocab.txt HTTP/1.1" 200 0
DEBUG:filelock:Attempting to acquire lock 140369799721312 on /home/atroska/.cache/huggingface/transformers/83261b0c74c462e53d6367de0646b1fca07d0f15f1be045156b9cf8c71279cc9.d789d64ebfe299b0e416afc4a169632f903f693095b4629a7ea271d5a0cf2c99.lock
INFO:filelock:Lock 140369799721312 acquired on /home/atroska/.cache/huggingface/transformers/83261b0c74c462e53d6367de0646b1fca07d0f15f1be045156b9cf8c71279cc9.d789d64ebfe299b0e416afc4a169632f903f693095b4629a7ea271d5a0cf2c99.lock
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggin

HBox(children=(HTML(value='Downloading'), FloatProgress(value=0.0, max=231508.0), HTML(value='')))

DEBUG:filelock:Attempting to release lock 140369799721312 on /home/atroska/.cache/huggingface/transformers/83261b0c74c462e53d6367de0646b1fca07d0f15f1be045156b9cf8c71279cc9.d789d64ebfe299b0e416afc4a169632f903f693095b4629a7ea271d5a0cf2c99.lock
INFO:filelock:Lock 140369799721312 released on /home/atroska/.cache/huggingface/transformers/83261b0c74c462e53d6367de0646b1fca07d0f15f1be045156b9cf8c71279cc9.d789d64ebfe299b0e416afc4a169632f903f693095b4629a7ea271d5a0cf2c99.lock
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co





DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /distilbert-base-uncased-finetuned-sst-2-english/resolve/main/tokenizer.json HTTP/1.1" 404 0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /distilbert-base-uncased-finetuned-sst-2-english/resolve/main/added_tokens.json HTTP/1.1" 404 0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /distilbert-base-uncased-finetuned-sst-2-english/resolve/main/special_tokens_map.json HTTP/1.1" 404 0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /distilbert-base-uncased-finetuned-sst-2-english/resolve/main/tokenizer_config.json HTTP/1.1" 200 0
DEBUG:filelock:Attempting to acquire lock 140369799721480 on /home/atroska/.cache/huggingface/transformers/d44ec0488a5f13d92

HBox(children=(HTML(value='Downloading'), FloatProgress(value=0.0, max=48.0), HTML(value='')))

DEBUG:filelock:Attempting to release lock 140369799721480 on /home/atroska/.cache/huggingface/transformers/d44ec0488a5f13d92b3934cb68cc5849bd74ce63ede2eea2bf3c675e1e57297c.627f9558061e7bc67ed0f516b2f7efc1351772cc8553101f08748d44aada8b11.lock
INFO:filelock:Lock 140369799721480 released on /home/atroska/.cache/huggingface/transformers/d44ec0488a5f13d92b3934cb68cc5849bd74ce63ede2eea2bf3c675e1e57297c.627f9558061e7bc67ed0f516b2f7efc1351772cc8553101f08748d44aada8b11.lock
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co





DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /distilbert-base-uncased-finetuned-sst-2-english/resolve/main/modelcard.json HTTP/1.1" 404 0


In [38]:
Transormers_preds_randomize = []

In [39]:
counter = 0
for i in all_tweet_df_randomize['tweet']:
    counter += 1
    if counter%100 == 0:
        print("Iteration: " + str(counter))
    preds = classifier(i)
    
    Transormers_preds_randomize.append(preds[0].get('label'))

Iteration: 100
Iteration: 200
Iteration: 300
Iteration: 400
Iteration: 500
Iteration: 600
Iteration: 700
Iteration: 800
Iteration: 900
Iteration: 1000
Iteration: 1100
Iteration: 1200
Iteration: 1300
Iteration: 1400
Iteration: 1500
Iteration: 1600
Iteration: 1700
Iteration: 1800
Iteration: 1900
Iteration: 2000
Iteration: 2100
Iteration: 2200
Iteration: 2300
Iteration: 2400
Iteration: 2500
Iteration: 2600
Iteration: 2700
Iteration: 2800
Iteration: 2900
Iteration: 3000
Iteration: 3100
Iteration: 3200
Iteration: 3300
Iteration: 3400
Iteration: 3500
Iteration: 3600
Iteration: 3700
Iteration: 3800
Iteration: 3900
Iteration: 4000
Iteration: 4100
Iteration: 4200
Iteration: 4300
Iteration: 4400
Iteration: 4500
Iteration: 4600
Iteration: 4700
Iteration: 4800
Iteration: 4900
Iteration: 5000
Iteration: 5100
Iteration: 5200
Iteration: 5300
Iteration: 5400
Iteration: 5500
Iteration: 5600
Iteration: 5700
Iteration: 5800
Iteration: 5900
Iteration: 6000
Iteration: 6100
Iteration: 6200
Iteration: 6300
I

Iteration: 49000
Iteration: 49100
Iteration: 49200
Iteration: 49300
Iteration: 49400
Iteration: 49500
Iteration: 49600
Iteration: 49700
Iteration: 49800
Iteration: 49900
Iteration: 50000
Iteration: 50100
Iteration: 50200
Iteration: 50300
Iteration: 50400
Iteration: 50500
Iteration: 50600
Iteration: 50700
Iteration: 50800
Iteration: 50900
Iteration: 51000
Iteration: 51100
Iteration: 51200
Iteration: 51300
Iteration: 51400
Iteration: 51500
Iteration: 51600
Iteration: 51700
Iteration: 51800
Iteration: 51900
Iteration: 52000
Iteration: 52100
Iteration: 52200
Iteration: 52300
Iteration: 52400
Iteration: 52500
Iteration: 52600
Iteration: 52700
Iteration: 52800
Iteration: 52900
Iteration: 53000
Iteration: 53100
Iteration: 53200
Iteration: 53300
Iteration: 53400
Iteration: 53500
Iteration: 53600
Iteration: 53700
Iteration: 53800
Iteration: 53900
Iteration: 54000
Iteration: 54100
Iteration: 54200
Iteration: 54300
Iteration: 54400
Iteration: 54500
Iteration: 54600
Iteration: 54700
Iteration: 548

Iteration: 97200
Iteration: 97300
Iteration: 97400
Iteration: 97500
Iteration: 97600
Iteration: 97700
Iteration: 97800
Iteration: 97900
Iteration: 98000
Iteration: 98100
Iteration: 98200
Iteration: 98300
Iteration: 98400
Iteration: 98500
Iteration: 98600
Iteration: 98700
Iteration: 98800
Iteration: 98900
Iteration: 99000
Iteration: 99100
Iteration: 99200
Iteration: 99300
Iteration: 99400
Iteration: 99500
Iteration: 99600
Iteration: 99700
Iteration: 99800
Iteration: 99900
Iteration: 100000
Iteration: 100100
Iteration: 100200
Iteration: 100300
Iteration: 100400
Iteration: 100500
Iteration: 100600
Iteration: 100700
Iteration: 100800
Iteration: 100900
Iteration: 101000
Iteration: 101100
Iteration: 101200
Iteration: 101300
Iteration: 101400
Iteration: 101500
Iteration: 101600
Iteration: 101700
Iteration: 101800
Iteration: 101900
Iteration: 102000
Iteration: 102100
Iteration: 102200
Iteration: 102300
Iteration: 102400
Iteration: 102500
Iteration: 102600
Iteration: 102700
Iteration: 102800
It

Iteration: 142900
Iteration: 143000
Iteration: 143100
Iteration: 143200
Iteration: 143300
Iteration: 143400
Iteration: 143500
Iteration: 143600
Iteration: 143700
Iteration: 143800
Iteration: 143900
Iteration: 144000
Iteration: 144100
Iteration: 144200
Iteration: 144300
Iteration: 144400
Iteration: 144500
Iteration: 144600
Iteration: 144700
Iteration: 144800
Iteration: 144900
Iteration: 145000
Iteration: 145100
Iteration: 145200
Iteration: 145300
Iteration: 145400
Iteration: 145500
Iteration: 145600
Iteration: 145700
Iteration: 145800
Iteration: 145900
Iteration: 146000
Iteration: 146100
Iteration: 146200
Iteration: 146300
Iteration: 146400
Iteration: 146500
Iteration: 146600
Iteration: 146700
Iteration: 146800
Iteration: 146900
Iteration: 147000
Iteration: 147100
Iteration: 147200
Iteration: 147300
Iteration: 147400
Iteration: 147500
Iteration: 147600
Iteration: 147700
Iteration: 147800
Iteration: 147900
Iteration: 148000
Iteration: 148100
Iteration: 148200
Iteration: 148300
Iteration:

Iteration: 188500
Iteration: 188600
Iteration: 188700
Iteration: 188800
Iteration: 188900
Iteration: 189000
Iteration: 189100
Iteration: 189200
Iteration: 189300
Iteration: 189400
Iteration: 189500
Iteration: 189600
Iteration: 189700
Iteration: 189800
Iteration: 189900
Iteration: 190000
Iteration: 190100
Iteration: 190200
Iteration: 190300
Iteration: 190400
Iteration: 190500
Iteration: 190600
Iteration: 190700
Iteration: 190800
Iteration: 190900
Iteration: 191000
Iteration: 191100
Iteration: 191200
Iteration: 191300
Iteration: 191400
Iteration: 191500
Iteration: 191600
Iteration: 191700
Iteration: 191800
Iteration: 191900
Iteration: 192000
Iteration: 192100
Iteration: 192200
Iteration: 192300
Iteration: 192400
Iteration: 192500
Iteration: 192600
Iteration: 192700
Iteration: 192800
Iteration: 192900
Iteration: 193000
Iteration: 193100
Iteration: 193200
Iteration: 193300
Iteration: 193400
Iteration: 193500
Iteration: 193600
Iteration: 193700
Iteration: 193800
Iteration: 193900
Iteration:

In [43]:
true = all_tweet_df_randomize['label']

In [44]:
true_list = list(true)

In [45]:
true_list

[0,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,


In [46]:
true_np = np.array(true)

In [47]:
true_np

array([0, 1, 0, ..., 0, 0, 0])

In [48]:
Transormers_preds_randomize

['NEGATIVE',
 'NEGATIVE',
 'POSITIVE',
 'NEGATIVE',
 'POSITIVE',
 'POSITIVE',
 'POSITIVE',
 'NEGATIVE',
 'POSITIVE',
 'POSITIVE',
 'NEGATIVE',
 'POSITIVE',
 'NEGATIVE',
 'POSITIVE',
 'POSITIVE',
 'POSITIVE',
 'NEGATIVE',
 'POSITIVE',
 'POSITIVE',
 'NEGATIVE',
 'NEGATIVE',
 'NEGATIVE',
 'NEGATIVE',
 'NEGATIVE',
 'POSITIVE',
 'POSITIVE',
 'POSITIVE',
 'NEGATIVE',
 'NEGATIVE',
 'POSITIVE',
 'NEGATIVE',
 'POSITIVE',
 'POSITIVE',
 'NEGATIVE',
 'NEGATIVE',
 'POSITIVE',
 'POSITIVE',
 'NEGATIVE',
 'POSITIVE',
 'NEGATIVE',
 'POSITIVE',
 'NEGATIVE',
 'NEGATIVE',
 'NEGATIVE',
 'NEGATIVE',
 'POSITIVE',
 'NEGATIVE',
 'POSITIVE',
 'NEGATIVE',
 'NEGATIVE',
 'POSITIVE',
 'NEGATIVE',
 'NEGATIVE',
 'POSITIVE',
 'POSITIVE',
 'NEGATIVE',
 'NEGATIVE',
 'POSITIVE',
 'NEGATIVE',
 'NEGATIVE',
 'NEGATIVE',
 'NEGATIVE',
 'NEGATIVE',
 'NEGATIVE',
 'POSITIVE',
 'NEGATIVE',
 'NEGATIVE',
 'POSITIVE',
 'NEGATIVE',
 'NEGATIVE',
 'POSITIVE',
 'NEGATIVE',
 'NEGATIVE',
 'NEGATIVE',
 'NEGATIVE',
 'POSITIVE',
 'NEGATIVE',

In [49]:
Transormers_preds_randomize_labels = []
for i in Transormers_preds_randomize:
    if i == "POSITIVE":
        Transormers_preds_randomize_labels.append(1)
    elif i == "NEGATIVE":
        Transormers_preds_randomize_labels.append(0)
    else :
        Transormers_preds_randomize_labels.append(1)

In [50]:
Transormers_preds_randomize_labels

[0,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,


In [51]:
true_list_samelen = true_list[0:len(Transormers_preds_randomize_labels)]

In [52]:
acc = accuracy(true_list_samelen, Transormers_preds_randomize_labels)

In [53]:
acc/len(true_list_samelen)*100

66.8185