In [1]:
import torch
import sys
import pandas as pd
import numpy as np


from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from transformers import BertTokenizer
from transformers import BertForSequenceClassification, BertForMultipleChoice
from transformers import AdamW
from torch.utils.data import DataLoader
from torch.nn import functional as F

In [2]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    print('Using GPU ', torch.cuda.get_device_name(0)) 
else:
    device = torch.device("cpu")
    print('Using CPU')

Using GPU  GeForce GTX 1660 Ti


In [3]:
# device = torch.device("cpu")
# print('Using CPU')

In [4]:
df_train = pd.read_csv('data/train.csv')
df_test = pd.read_csv('data/test.csv')

In [5]:
df_train['label'] = df_train['label'].replace({-1: 2})
df_test['label'] = df_test['label'].replace({-1: 2})

In [6]:
df_train.head()

Unnamed: 0,stock,date,title,label
0,KMI,2014-08-13,3 MLPs That Could Follow In Kinder Morgan's Fo...,2
1,DGX,2015-01-30,Wells Fargo Downgrades Quest Diagnostics to Un...,2
2,FBIZ,2016-11-23,Benzinga's Top Initiations,2
3,KMX,2017-09-22,"Earnings Scheduled For September 22, 2017",1
4,CONN,2014-09-02,"Conn's, Inc. Sees FY2015 EPS $2.80-3.00 vs $3....",0


In [7]:
train_texts, train_labels = df_train['title'].to_numpy(), df_train['label'].to_numpy()
test_texts, test_labels = df_test['title'].to_numpy(), df_test['label'].to_numpy()

In [8]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [9]:
class MyDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

In [10]:
train_encodings = tokenizer(list(train_texts), return_tensors='pt', truncation=True, padding=True, max_length=20)

In [11]:
train_dataset = MyDataset(train_encodings, train_labels)

In [12]:
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', return_dict=True, num_labels=3)
model.to(device)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

In [13]:
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

In [14]:
lr = 1e-5
optim = AdamW(model.parameters(), lr=lr)

In [15]:
EPOCHES = 20
for epoch in range(EPOCHES):
   
    model.train()
    train_loss = 0
    for batch in train_loader:
        optim.zero_grad()
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        batch_labels = batch['labels'].to(device)
        outputs = model(input_ids, attention_mask=attention_mask, labels=batch_labels)
        loss = F.cross_entropy(outputs.logits, batch_labels)
        loss.backward()
        optim.step()

        train_loss += loss.item()
    
    print("Epoch {}/{}".format(epoch+1, EPOCHES))
    print("-"*15)
    print("Train loss: {}".format(train_loss))

  item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}


Epoch 1/20
---------------
Train loss: 787.6373723745346
Epoch 2/20
---------------
Train loss: 760.5328217744827


KeyboardInterrupt: 