In [2]:
import torch
from torch.utils.data import Dataset, DataLoader

from transformers import CamembertForSequenceClassification, CamembertTokenizer

In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [4]:
class TweetDataset(Dataset):
    def __init__(
        self, device, file_path="data/train.csv"
    ):
        f = open(file_path, "r")
        self.device = device
        self.data_lines = f.readlines()
        self.tokenizer = CamembertTokenizer.from_pretrained('camembert-base')

    def __len__(self):
        return len(self.data_lines)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
            
        line = self.data_lines[idx]
        arr = line.split(",")
        label = int(arr[0])
        
        sentence = ",".join(arr[1:])[:-1]
        inputs = self.tokenizer.encode_plus(sentence, max_length=100, pad_to_max_length=True)
        input_ids = torch.tensor([inputs["input_ids"]]).to(self.device)
        token_type_ids = torch.tensor([inputs["token_type_ids"]]).to(self.device)

        sample = {"input_ids": input_ids[0], "token_type_ids": token_type_ids[0], "label": label}

        return sample

In [5]:
train_dataset = TweetDataset(
    device,
    file_path="data/train.csv",
)

In [6]:
train_batch_size=20
dev_batch_size=10

train_dataloader = DataLoader(
    train_dataset, batch_size=train_batch_size, shuffle=True, num_workers=0
)

In [7]:
model = CamembertForSequenceClassification.from_pretrained("camembert-base")
model.to(device)

CamembertForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(32005, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNo

In [8]:
loss_fn = torch.nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)

In [None]:
current_loss = 0

for step, sample_batched in enumerate(train_dataloader):
    
    with torch.set_grad_enabled(True):
        output = model(sample_batched["input_ids"], token_type_ids = sample_batched["token_type_ids"])
        loss = loss_fn(output[0], sample_batched["label"].to(device))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        current_loss += loss
        if step % 100 == 0:
            print(step, current_loss)
            current_loss = 0

    

0 tensor(0.4884, device='cuda:0', grad_fn=<AddBackward0>)
100 tensor(41.8111, device='cuda:0', grad_fn=<AddBackward0>)
200 tensor(42.2781, device='cuda:0', grad_fn=<AddBackward0>)
300 tensor(41.8329, device='cuda:0', grad_fn=<AddBackward0>)
400 tensor(42.0086, device='cuda:0', grad_fn=<AddBackward0>)
500 tensor(40.3247, device='cuda:0', grad_fn=<AddBackward0>)
600 tensor(40.9915, device='cuda:0', grad_fn=<AddBackward0>)
700 tensor(42.5556, device='cuda:0', grad_fn=<AddBackward0>)
800 tensor(42.6961, device='cuda:0', grad_fn=<AddBackward0>)
900 tensor(42.4590, device='cuda:0', grad_fn=<AddBackward0>)
1000 tensor(46.0486, device='cuda:0', grad_fn=<AddBackward0>)
1100 tensor(41.4848, device='cuda:0', grad_fn=<AddBackward0>)
1200 tensor(43.3213, device='cuda:0', grad_fn=<AddBackward0>)
1300 tensor(41.4024, device='cuda:0', grad_fn=<AddBackward0>)
1400 tensor(39.2359, device='cuda:0', grad_fn=<AddBackward0>)
1500 tensor(39.7814, device='cuda:0', grad_fn=<AddBackward0>)
1600 tensor(42.2689, 