In [1]:
import torch
from transformers import T5ForConditionalGeneration,T5Tokenizer


def set_seed(seed):
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_seed(42)

model = T5ForConditionalGeneration.from_pretrained('F:\\Paraphrase\\model\\')
tokenizer = T5Tokenizer.from_pretrained('F:\\Paraphrase\\tokenizer\\')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print ("device ",device)
model = model.to(device)

from torch.utils.data import Dataset, DataLoader
import os
import pandas as pd

class ParaphraseDataset(Dataset):
    def __init__(self, tokenizer, data_dir, type_path, max_len=512):
        self.path = os.path.join(data_dir, type_path + '.tsv')

        self.source_column = "sentence1"
        self.target_column = "sentence2"
        self.data = pd.read_csv(self.path, sep="\t").astype(str)

        self.max_len = max_len
        self.tokenizer = tokenizer
        self.inputs = []
        self.targets = []

        self._build()

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, index):
        source_ids = self.inputs[index]["input_ids"].squeeze()
        target_ids = self.targets[index]["input_ids"].squeeze()

        src_mask = self.inputs[index]["attention_mask"].squeeze()  # might need to squeeze
        target_mask = self.targets[index]["attention_mask"].squeeze()  # might need to squeeze

        return {"source_ids": source_ids, "source_mask": src_mask, "target_ids": target_ids, "target_mask": target_mask}

    def _build(self):
        for idx in range(len(self.data)):
            input_, target = self.data.loc[idx, self.source_column], self.data.loc[idx, self.target_column]

            input_ = "paraphrase: "+ input_ + ' </s>'
            target = target + " </s>"

            # tokenize inputs
            tokenized_inputs = self.tokenizer.batch_encode_plus(
                [input_], max_length=self.max_len, pad_to_max_length=True, return_tensors="pt", truncation='longest_first'
            )
            # tokenize targets
            tokenized_targets = self.tokenizer.batch_encode_plus(
                [target], max_length=self.max_len, pad_to_max_length=True, return_tensors="pt", truncation='longest_first'
            )

            self.inputs.append(tokenized_inputs)
            self.targets.append(tokenized_targets)

train_dataset = ParaphraseDataset(tokenizer, 'data', 'train', 256)
test_dataset = ParaphraseDataset(tokenizer, 'data', 'dev', 256)

from transformers import Trainer, TrainingArguments

for param in model.base_model.parameters():
    param.requires_grad = False

def get_dataset(tokenizer, type_path, args):
    return ParaphraseDataset(tokenizer=tokenizer, data_dir='data', type_path='train',  max_len=256)

train_dataset = get_dataset(tokenizer=tokenizer, type_path="train", args={})
train_dataloader = DataLoader(train_dataset, batch_size=32, drop_last=True, shuffle=True,
                                num_workers=4)

def get_dataset(tokenizer, type_path, args):
    return ParaphraseDataset(tokenizer=tokenizer, data_dir='data', type_path='test',  max_len=256)

test_dataset = get_dataset(tokenizer=tokenizer, type_path="test", args={})
test_dataloader = DataLoader(test_dataset, batch_size=32, drop_last=True, shuffle=True,
                                num_workers=4)

from torch.utils.data import DataLoader
from transformers import AdamW

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

print(device)

model.to(device)
model.train()

train_loader = train_dataloader

optim = AdamW(model.parameters(), lr=5e-5)

for param in model.base_model.parameters():
    param.requires_grad = False

for epoch in range(3):
    c = 0
    for batch in train_loader:
        print(c)
        c+=1
#         optim.zero_grad()
#         input_ids = batch['source_ids'].to(device)
#         attention_mask = batch['source_mask'].to(device)
#         decoder_inputs  = batch['target_ids'].to(device)
#         decoder_attention_mask = batch['target_mask'].to(device)
#         outputs = model(input_ids, attention_mask=attention_mask,decoder_input_ids = decoder_inputs, decoder_attention_mask = decoder_attention_mask)
#         loss = outputs[0]
#         if epoch > 0:
#             loss.backward()
#         optim.step()
    print(f'==================== Epoch {epoch} Finished ===========================')

# model.eval()

  return torch._C._cuda_getDeviceCount() > 0


device  cpu


In [None]:
for batch in train_loader:
    print(1)