In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from nltk.corpus import stopwords
from nltk.util import ngrams

In [2]:
import numpy as np
import random
import torch
import matplotlib.pylab as plt 
from torch.nn.utils import clip_grad_norm_
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
from transformers import get_linear_schedule_with_warmup

seed = 42
batch_size = 16
learning_rate = 2e-5
weight_decay = 1e-2
epsilon = 1e-8

random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


Moving 0 files to the new cache system


0it [00:00, ?it/s]

<torch._C.Generator at 0x1ac9c758cd0>

In [3]:
from collections import defaultdict, Counter
plt.style.use('ggplot')
stop=set(stopwords.words('english'))

In [4]:
import re
from nltk.tokenize import word_tokenize
import gensim
import string

In [5]:
from tqdm import tqdm

In [6]:
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
from transformers import get_linear_schedule_with_warmup

In [7]:
import torch
from torch import nn
from d2l import torch as d2l

In [8]:
train = pd.read_csv('./train.csv')
test = pd.read_csv('./test.csv')
sample_submission = pd.read_csv('./sample_submission.csv')

print('{} rows and {} cols in train dataset.'.format(train.shape[0], train.shape[1]))
print('{} rows and {} cols in test dataset.'.format(test.shape[0], test.shape[1]))

7613 rows and 5 cols in train dataset.
3263 rows and 4 cols in test dataset.


# Data Cleaning

In [9]:
df = pd.concat([train, test], sort=True)

In [10]:
# Remove url
def remove_url(text):
    url = re.compile(r'https?://\S+|www\.\S+')
    return url.sub(r'', text)
test_str = 'Address of this kernel: https://www.kaggle.com/lilstarboy/kernel4d04fe5667/edit'
print(remove_url(test_str))

Address of this kernel: 


In [11]:
df['text'] = df['text'].apply(remove_url)

In [12]:
# Remove html tag
def remove_html(text):
    html = re.compile(r'<.*?>')
    return html.sub(r'', text)
test_html = """<div>
<h1>Real or Fake</h1>
<p>Kaggle </p>
<a href="https://www.kaggle.com/c/nlp-getting-started">getting started</a>
</div>"""
print(remove_html(test_html))


Real or Fake
Kaggle 
getting started



In [13]:
df['text'] = df['text'].apply(remove_html)

In [14]:
# Remove emojis
def remove_emoji(text):
    emoji_pattern = re.compile("["
                           u"\U0001F600-\U0001F64F"  # emoticons
                           u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                           u"\U0001F680-\U0001F6FF"  # transport & map symbols
                           u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                           u"\U00002702-\U000027B0"
                           u"\U000024C2-\U0001F251"
                           "]+", flags=re.UNICODE)
    return emoji_pattern.sub(r'', text)
remove_emoji("To test 🚀")

'To test '

In [15]:
df['text'] = df['text'].apply(remove_emoji)

In [16]:
# Remove punctuations
def remove_punct(text):
    table = str.maketrans('', '', string.punctuation)
    return text.translate(table)
test_punct = 'This is very complex!!!!!??'
print(remove_punct(test_punct))

This is very complex


In [17]:
df['text'] = df['text'].apply(remove_punct)

# Final Clean

In [18]:
def clean_tweets(tweet):
    tweet = ''.join([x for x in tweet if x in string.printable])
    tweet = re.sub(r"http\S+", "", tweet)
    return tweet

In [19]:
def remove_emoji(text):
    emoji_pattern = re.compile("["
                           u"\U0001F600-\U0001F64F"  # emoticons
                           u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                           u"\U0001F680-\U0001F6FF"  # transport & map symbols
                           u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                           u"\U00002702-\U000027B0"
                           u"\U000024C2-\U0001F251"
                           "]+", flags=re.UNICODE)
    return emoji_pattern.sub(r'', text)

In [20]:
def remove_punctuations(text):
    punctuations = '@#!?+&*[]-%.:/();$=><|{}^' + "'`"
    
    for p in punctuations:
        text = text.replace(p, f' {p} ')

    text = text.replace('...', ' ... ')
    
    if '...' not in text:
        text = text.replace('..', ' ... ')
    
    return text

In [21]:
abbreviations = {
    "$" : " dollar ",
    "€" : " euro ",
    "4ao" : "for adults only",
    "a.m" : "before midday",
    "a3" : "anytime anywhere anyplace",
    "aamof" : "as a matter of fact",
    "acct" : "account",
    "adih" : "another day in hell",
    "afaic" : "as far as i am concerned",
    "afaict" : "as far as i can tell",
    "afaik" : "as far as i know",
    "afair" : "as far as i remember",
    "afk" : "away from keyboard",
    "app" : "application",
    "approx" : "approximately",
    "apps" : "applications",
    "asap" : "as soon as possible",
    "asl" : "age, sex, location",
    "atk" : "at the keyboard",
    "ave." : "avenue",
    "aymm" : "are you my mother",
    "ayor" : "at your own risk", 
    "b&b" : "bed and breakfast",
    "b+b" : "bed and breakfast",
    "b.c" : "before christ",
    "b2b" : "business to business",
    "b2c" : "business to customer",
    "b4" : "before",
    "b4n" : "bye for now",
    "b@u" : "back at you",
    "bae" : "before anyone else",
    "bak" : "back at keyboard",
    "bbbg" : "bye bye be good",
    "bbc" : "british broadcasting corporation",
    "bbias" : "be back in a second",
    "bbl" : "be back later",
    "bbs" : "be back soon",
    "be4" : "before",
    "bfn" : "bye for now",
    "blvd" : "boulevard",
    "bout" : "about",
    "brb" : "be right back",
    "bros" : "brothers",
    "brt" : "be right there",
    "bsaaw" : "big smile and a wink",
    "btw" : "by the way",
    "bwl" : "bursting with laughter",
    "c/o" : "care of",
    "cet" : "central european time",
    "cf" : "compare",
    "cia" : "central intelligence agency",
    "csl" : "can not stop laughing",
    "cu" : "see you",
    "cul8r" : "see you later",
    "cv" : "curriculum vitae",
    "cwot" : "complete waste of time",
    "cya" : "see you",
    "cyt" : "see you tomorrow",
    "dae" : "does anyone else",
    "dbmib" : "do not bother me i am busy",
    "diy" : "do it yourself",
    "dm" : "direct message",
    "dwh" : "during work hours",
    "e123" : "easy as one two three",
    "eet" : "eastern european time",
    "eg" : "example",
    "embm" : "early morning business meeting",
    "encl" : "enclosed",
    "encl." : "enclosed",
    "etc" : "and so on",
    "faq" : "frequently asked questions",
    "fawc" : "for anyone who cares",
    "fb" : "facebook",
    "fc" : "fingers crossed",
    "fig" : "figure",
    "fimh" : "forever in my heart", 
    "ft." : "feet",
    "ft" : "featuring",
    "ftl" : "for the loss",
    "ftw" : "for the win",
    "fwiw" : "for what it is worth",
    "fyi" : "for your information",
    "g9" : "genius",
    "gahoy" : "get a hold of yourself",
    "gal" : "get a life",
    "gcse" : "general certificate of secondary education",
    "gfn" : "gone for now",
    "gg" : "good game",
    "gl" : "good luck",
    "glhf" : "good luck have fun",
    "gmt" : "greenwich mean time",
    "gmta" : "great minds think alike",
    "gn" : "good night",
    "g.o.a.t" : "greatest of all time",
    "goat" : "greatest of all time",
    "goi" : "get over it",
    "gps" : "global positioning system",
    "gr8" : "great",
    "gratz" : "congratulations",
    "gyal" : "girl",
    "h&c" : "hot and cold",
    "hp" : "horsepower",
    "hr" : "hour",
    "hrh" : "his royal highness",
    "ht" : "height",
    "ibrb" : "i will be right back",
    "ic" : "i see",
    "icq" : "i seek you",
    "icymi" : "in case you missed it",
    "idc" : "i do not care",
    "idgadf" : "i do not give a damn fuck",
    "idgaf" : "i do not give a fuck",
    "idk" : "i do not know",
    "ie" : "that is",
    "i.e" : "that is",
    "ifyp" : "i feel your pain",
    "IG" : "instagram",
    "iirc" : "if i remember correctly",
    "ilu" : "i love you",
    "ily" : "i love you",
    "imho" : "in my humble opinion",
    "imo" : "in my opinion",
    "imu" : "i miss you",
    "iow" : "in other words",
    "irl" : "in real life",
    "j4f" : "just for fun",
    "jic" : "just in case",
    "jk" : "just kidding",
    "jsyk" : "just so you know",
    "l8r" : "later",
    "lb" : "pound",
    "lbs" : "pounds",
    "ldr" : "long distance relationship",
    "lmao" : "laugh my ass off",
    "lmfao" : "laugh my fucking ass off",
    "lol" : "laughing out loud",
    "ltd" : "limited",
    "ltns" : "long time no see",
    "m8" : "mate",
    "mf" : "motherfucker",
    "mfs" : "motherfuckers",
    "mfw" : "my face when",
    "mofo" : "motherfucker",
    "mph" : "miles per hour",
    "mr" : "mister",
    "mrw" : "my reaction when",
    "ms" : "miss",
    "mte" : "my thoughts exactly",
    "nagi" : "not a good idea",
    "nbc" : "national broadcasting company",
    "nbd" : "not big deal",
    "nfs" : "not for sale",
    "ngl" : "not going to lie",
    "nhs" : "national health service",
    "nrn" : "no reply necessary",
    "nsfl" : "not safe for life",
    "nsfw" : "not safe for work",
    "nth" : "nice to have",
    "nvr" : "never",
    "nyc" : "new york city",
    "oc" : "original content",
    "og" : "original",
    "ohp" : "overhead projector",
    "oic" : "oh i see",
    "omdb" : "over my dead body",
    "omg" : "oh my god",
    "omw" : "on my way",
    "p.a" : "per annum",
    "p.m" : "after midday",
    "pm" : "prime minister",
    "poc" : "people of color",
    "pov" : "point of view",
    "pp" : "pages",
    "ppl" : "people",
    "prw" : "parents are watching",
    "ps" : "postscript",
    "pt" : "point",
    "ptb" : "please text back",
    "pto" : "please turn over",
    "qpsa" : "what happens", #"que pasa",
    "ratchet" : "rude",
    "rbtl" : "read between the lines",
    "rlrt" : "real life retweet", 
    "rofl" : "rolling on the floor laughing",
    "roflol" : "rolling on the floor laughing out loud",
    "rotflmao" : "rolling on the floor laughing my ass off",
    "rt" : "retweet",
    "ruok" : "are you ok",
    "sfw" : "safe for work",
    "sk8" : "skate",
    "smh" : "shake my head",
    "sq" : "square",
    "srsly" : "seriously", 
    "ssdd" : "same stuff different day",
    "tbh" : "to be honest",
    "tbs" : "tablespooful",
    "tbsp" : "tablespooful",
    "tfw" : "that feeling when",
    "thks" : "thank you",
    "tho" : "though",
    "thx" : "thank you",
    "tia" : "thanks in advance",
    "til" : "today i learned",
    "tl;dr" : "too long i did not read",
    "tldr" : "too long i did not read",
    "tmb" : "tweet me back",
    "tntl" : "trying not to laugh",
    "ttyl" : "talk to you later",
    "u" : "you",
    "u2" : "you too",
    "u4e" : "yours for ever",
    "utc" : "coordinated universal time",
    "w/" : "with",
    "w/o" : "without",
    "w8" : "wait",
    "wassup" : "what is up",
    "wb" : "welcome back",
    "wtf" : "what the fuck",
    "wtg" : "way to go",
    "wtpa" : "where the party at",
    "wuf" : "where are you from",
    "wuzup" : "what is up",
    "wywh" : "wish you were here",
    "yd" : "yard",
    "ygtr" : "you got that right",
    "ynk" : "you never know",
    "zzz" : "sleeping bored and tired"
}

def convert_abbrev(word):
    return abbreviations[word.lower()] if word.lower() in abbreviations.keys() else word

def convert_abbrev_in_text(text):
    tokens = word_tokenize(text)
    tokens = [convert_abbrev(word) for word in tokens]
    text = ' '.join(tokens)
    return text

In [22]:
def preprocess(data_dir):
    train = pd.read_csv(data_dir + 'train.csv')
    test = pd.read_csv(data_dir + 'test.csv')
    df = pd.concat([train, test], sort=True)
    df['text'] = df['text'].apply(lambda x: clean_tweets(x))
    df['text'] = df['text'].apply(lambda x: remove_emoji(x))
    df['text'] = df['text'].apply(lambda x: remove_punctuations(x))
    df['text'] = df['text'].apply(lambda x: convert_abbrev_in_text(x))
    df = df.iloc[:, [3, 4]]
    n_train = len(train)
    df_train = df.iloc[:n_train, :]
    df_test = df.iloc[n_train:, :]
    return df_train, df_test

In [23]:
def read_tweet(df, is_train):
    data, labels = [], []
    if is_train:
        for i in range(len(df)):
            data.append(df.iloc[i, 1])
            labels.append(int(df.iloc[i, 0]))
        return data, labels
    else:
        for i in range(len(df)):
            data.append(df.iloc[i, 1])
        return data

In [25]:
df_train, df_test = preprocess('./')
train_text, train_labels = read_tweet(df_train, True)
test_text = read_tweet(df_test, False)

In [26]:
train_labels = torch.tensor(train_labels).reshape(-1, 1)
train_labels

tensor([[1],
        [1],
        [1],
        ...,
        [1],
        [1],
        [1]])

In [27]:
model_name = 'bert-base-uncased'
cache_dir = './'
tokenizer = BertTokenizer.from_pretrained(model_name, cache_dir=cache_dir)
print(train_text[2])
print(tokenizer.tokenize(train_text[2]))
print(tokenizer.encode(train_text[2]))
print(tokenizer.convert_ids_to_tokens(tokenizer.encode(train_text[2])))

All residents asked to ' shelter in place ' are being notified by officers . No other evacuation or shelter in place orders are expected
['all', 'residents', 'asked', 'to', "'", 'shelter', 'in', 'place', "'", 'are', 'being', 'notified', 'by', 'officers', '.', 'no', 'other', 'evacuation', 'or', 'shelter', 'in', 'place', 'orders', 'are', 'expected']
[101, 2035, 3901, 2356, 2000, 1005, 7713, 1999, 2173, 1005, 2024, 2108, 19488, 2011, 3738, 1012, 2053, 2060, 13982, 2030, 7713, 1999, 2173, 4449, 2024, 3517, 102]
['[CLS]', 'all', 'residents', 'asked', 'to', "'", 'shelter', 'in', 'place', "'", 'are', 'being', 'notified', 'by', 'officers', '.', 'no', 'other', 'evacuation', 'or', 'shelter', 'in', 'place', 'orders', 'are', 'expected', '[SEP]']


# Dataloader

In [28]:
def tokenize(tokenizer, sentence, max_len=128):
    tokens = tokenizer.encode(sentence[:max_len])
    if len(tokens) < max_len + 2:
        tokens.extend([0] * (max_len + 2 - len(tokens)))
    return tokens
input_ids = [tokenize(tokenizer, sen) for sen in train_text]
input_tokens = torch.tensor(input_ids)

In [29]:
def attention_masks(input_ids):
    atten_masks = []
    for seq in input_ids:
        seq_mask = [float(i > 0) for i in seq]
        atten_masks.append(seq_mask)
    return atten_masks
attention_tokens = torch.tensor(attention_masks(input_ids))
print(attention_tokens.shape)

torch.Size([7613, 130])


In [30]:
from sklearn.model_selection import train_test_split
train_inputs, test_inputs, train_labels, test_labels = train_test_split(input_tokens, train_labels, 
                                                                        random_state=42, test_size=0.2)
train_masks, test_masks, _, _ = train_test_split(attention_tokens, input_tokens, 
                                                 random_state=42, test_size=0.2)

In [31]:
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler

In [32]:
train_data = TensorDataset(train_inputs, train_masks, train_labels)
train_sampler = RandomSampler(train_data)
train_iter = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)

test_data = TensorDataset(test_inputs, test_masks, test_labels)
test_sampler = RandomSampler(test_data)
test_iter = DataLoader(test_data, sampler=test_sampler, batch_size=batch_size)

In [33]:
for i, (train, mask, label) in enumerate(train_iter): 
    print(train.shape, mask.shape, label.shape)
    break

print('len(train_iter) = ', len(train_iter))

torch.Size([16, 130]) torch.Size([16, 130]) torch.Size([16, 1])
len(train_iter) =  381


# Model

In [34]:
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)
device = torch.device('cuda')
model.to(device)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

In [35]:
no_decay = ['bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
    {'params' : [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
     'weight_decay' : weight_decay
    },
    {'params' : [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)],
     'weight_decay' : 0.0
    }
]

optimizer = AdamW(optimizer_grouped_parameters, lr = learning_rate, eps = epsilon)



In [36]:
num_epochs = 5
total_steps = len(train_iter) * num_epochs
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, 
                                            num_training_steps=total_steps)

# Acc & Time

In [37]:
def binary_acc(preds, labels):
    correct = torch.eq(torch.max(preds, dim=1)[1], labels.flatten()).float()
    acc = correct.sum().item() / len(correct)
    return acc

In [38]:
import time
import datetime

def format_time(elapsed):
    elapsed_rounded = int(round(elapsed))
    return str(datetime.timedelta(seconds=elapsed_rounded))

# Train

In [39]:
def train(model, optimizer):
    t0 = time.time()
    avg_loss, avg_acc = [], []
    model.train()
    for step, batch in enumerate(train_iter):
        # 每隔40个batch 输出一下所用时间.
        if step % 40 == 0 and not step == 0:
            elapsed = format_time(time.time() - t0)
            print(' Batch {:>5,} of {:>5,}.  Elapsed: {:}.'.format(step, len(train_iter), elapsed))
        b_input_ids, b_input_mask, b_labels = batch[0].long().to(device), \
                        batch[1].long().to(device), batch[2].long().to(device)
        output = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask, 
                       labels=b_labels)
        loss, logits = output[0], output[1]    # loss: 损失, logits: predict
        
        avg_loss.append(loss.item())
        acc = binary_acc(logits, b_labels)
        avg_acc.append(acc)
        
        optimizer.zero_grad()
        loss.backward()
        clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        scheduler.step()
    avg_acc = np.array(avg_acc).mean()
    avg_loss = np.array(avg_loss).mean()
    return avg_loss, avg_acc

# Eval model

In [40]:
def evaluate(model):
    avg_acc = []
    model.eval()
    
    with torch.no_grad():
        for batch in test_iter:
            b_input_ids, b_input_mask, b_labels = batch[0].long().to(device), \
                        batch[1].long().to(device), batch[2].long().to(device)
            output = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)
            acc = binary_acc(output[0], b_labels)
            avg_acc.append(acc)
    avg_acc = np.array(avg_acc).mean()
    return avg_acc

# Run & eval

In [41]:
for epoch in range(num_epochs):
    train_loss, train_acc = train(model, optimizer)
    print('epoch={}, train acc={}, loss={}'.format(epoch + 1, train_acc, train_loss))
    
    test_acc = evaluate(model)
    print('epoch={}, test acc={}'.format(epoch + 1, test_acc))

 Batch    40 of   381.  Elapsed: 0:00:24.
 Batch    80 of   381.  Elapsed: 0:00:45.
 Batch   120 of   381.  Elapsed: 0:01:05.
 Batch   160 of   381.  Elapsed: 0:01:25.
 Batch   200 of   381.  Elapsed: 0:01:45.
 Batch   240 of   381.  Elapsed: 0:02:06.
 Batch   280 of   381.  Elapsed: 0:02:26.
 Batch   320 of   381.  Elapsed: 0:02:46.
 Batch   360 of   381.  Elapsed: 0:03:06.
epoch=1, train acc=0.8053477690288713, loss=0.44403238054804917
epoch=1, test acc=0.8411458333333334
 Batch    40 of   381.  Elapsed: 0:00:20.
 Batch    80 of   381.  Elapsed: 0:00:41.
 Batch   120 of   381.  Elapsed: 0:01:01.
 Batch   160 of   381.  Elapsed: 0:01:21.
 Batch   200 of   381.  Elapsed: 0:01:41.
 Batch   240 of   381.  Elapsed: 0:02:02.
 Batch   280 of   381.  Elapsed: 0:02:22.
 Batch   320 of   381.  Elapsed: 0:02:42.
 Batch   360 of   381.  Elapsed: 0:03:03.
epoch=2, train acc=0.8804133858267716, loss=0.308698999414372
epoch=2, test acc=0.8370225694444443
 Batch    40 of   381.  Elapsed: 0:00:20.
 B

In [44]:
def predict(sen):
    input_id = tokenize(tokenizer, sen)
    input_token =  torch.tensor(input_id).long().to(device)
    
    atten_mask = [float(i>0) for i in input_id]
    attention_token = torch.tensor(atten_mask).long().to(device)
    
    output = model(input_token.view(1, -1), token_type_ids=None, 
                    attention_mask=attention_token.view(1, -1))
    return torch.max(output[0], dim=1)[1]

In [45]:
preds = []
for sen in test_text:
    pred = predict(sen)
    preds.append(pred)
preds

[tensor([1], device='cuda:0'),
 tensor([1], device='cuda:0'),
 tensor([1], device='cuda:0'),
 tensor([1], device='cuda:0'),
 tensor([1], device='cuda:0'),
 tensor([1], device='cuda:0'),
 tensor([0], device='cuda:0'),
 tensor([0], device='cuda:0'),
 tensor([0], device='cuda:0'),
 tensor([0], device='cuda:0'),
 tensor([0], device='cuda:0'),
 tensor([0], device='cuda:0'),
 tensor([0], device='cuda:0'),
 tensor([0], device='cuda:0'),
 tensor([0], device='cuda:0'),
 tensor([1], device='cuda:0'),
 tensor([0], device='cuda:0'),
 tensor([1], device='cuda:0'),
 tensor([0], device='cuda:0'),
 tensor([0], device='cuda:0'),
 tensor([0], device='cuda:0'),
 tensor([0], device='cuda:0'),
 tensor([0], device='cuda:0'),
 tensor([1], device='cuda:0'),
 tensor([0], device='cuda:0'),
 tensor([1], device='cuda:0'),
 tensor([0], device='cuda:0'),
 tensor([0], device='cuda:0'),
 tensor([0], device='cuda:0'),
 tensor([1], device='cuda:0'),
 tensor([0], device='cuda:0'),
 tensor([0], device='cuda:0'),
 tensor(

In [61]:
target = []
for ch in preds:
    target.append(ch.cpu().data.numpy())
target[:10]

[array([1], dtype=int64),
 array([1], dtype=int64),
 array([1], dtype=int64),
 array([1], dtype=int64),
 array([1], dtype=int64),
 array([1], dtype=int64),
 array([0], dtype=int64),
 array([0], dtype=int64),
 array([0], dtype=int64),
 array([0], dtype=int64)]

In [63]:
target = np.array(target)

In [64]:
submission = pd.read_csv('./sample_submission.csv')
submission['target'] = target

In [65]:
submission.to_csv('nlp_prediction.csv',index=False)