In [1]:
from transformers import BertTokenizer, BertForSequenceClassification
from torch.utils.data import Dataset,DataLoader
from tqdm.auto import tqdm
import torch.utils.data as data
import pandas as pd
import torch
import transformers

In [None]:
class News(Dataset):
    def __init__(self, inputs, targets, tokenizer, max_len=512):
        t = tokenizer(inputs)
        self.data = []
        for ids,sep,mask,label in zip(t['input_ids'], t['token_type_ids'], t['attention_mask'], targets):         
            self.data.append({'input_ids':torch.tensor(ids[0:512])
                              ,'token_type_ids':torch.tensor(sep[0:512])
                              ,'attention_mask':torch.tensor(mask[0:512])
                              ,'labels':torch.tensor(label)})
    def __getitem__(self,index):
       
        return self.data[index]

    def __len__(self):
        return len(self.data)
    
tokenizer = BertTokenizer.from_pretrained("ydshieh/bert-base-uncased-yelp-polarity")
model = BertForSequenceClassification.from_pretrained("textattack/bert-base-uncased-yelp-polarity")

df_fake = pd.read_csv('Fake.csv')[:100]
df_real = pd.read_csv('True.csv')[:100]

inputs = df_fake['text'].tolist() + df_real['text'].tolist()
targets = len(df_fake['text'].tolist())*[0]+len(df_real['text'].tolist())*[1]
dataset = News(inputs, targets, tokenizer)

train_set_size = int(len(dataset) * 0.8)
test_set_size = len(dataset) - train_set_size
train_set, test_set = data.random_split(dataset, [train_set_size, test_set_size])

In [None]:
train_loader = DataLoader(train_set,batch_size = 1,shuffle = True)
test_loader = DataLoader(test_set, batch_size = 1, shuffle = True)                          

In [None]:
model.cuda()
optimizer = torch.optim.AdamW(params = model.parameters(), lr = 1e-4)
for epoch in range(20):
    model.train()
    train = tqdm(train_loader)
    for data in train:
        for key in data.keys():
            data[key] = data[key].cuda()
        outputs = model(**data)
        loss = outputs.loss
        train.set_description(f'Epoch {epoch}')
        train.set_postfix({'Loss': loss.item()})
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    
    model.eval()
    test = tqdm(test_loader)
    correct = 0
    for data in test:
        for key in data.keys():
            data[key] = data[key].cuda()
        outputs = model(**data)
        _,predict_label = torch.max(outputs.logits,1)
        correct += (predict_label==data['labels']).sum()
        test.set_description(f'Epoch {epoch}')
        test.set_postfix({'acc':'{:.4f}'.format(correct / len(test_set) * 100)})
    model.save_pretrained('model_{}'.format(epoch))