## Loading data from kaggle

In [None]:
!pip install transformers -q


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m56.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m96.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.5/224.5 kB[0m [31m27.2 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:

import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler

# Importing the T5 modules from huggingface/transformers
from transformers import T5TokenizerFast, T5ForConditionalGeneration

In [None]:

from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'

In [None]:
class CustomDataset(Dataset):

    def __init__(self, dataframe, tokenizer, source_len, summ_len):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.source_len = source_len
        self.summ_len = summ_len
        self.article = self.data.article
        self.highlights = self.data.highlights

    def __len__(self):
        return len(self.article)

    def __getitem__(self, index):
        highlights = str(self.highlights[index])
        highlights = ' '.join(highlights.split())

        article = str(self.article[index])
        article = ' '.join(article.split())

        source = self.tokenizer.batch_encode_plus([highlights], max_length= self.source_len, padding="max_length",return_tensors='pt', truncation= True)
        target = self.tokenizer.batch_encode_plus([article], max_length= self.summ_len, padding="max_length",return_tensors='pt', truncation= True)

        source_ids = source['input_ids'].squeeze()
        source_mask = source['attention_mask'].squeeze()
        target_ids = target['input_ids'].squeeze()
        target_mask = target['attention_mask'].squeeze()

        return {
            'source_ids': source_ids.to(dtype=torch.long), 
            'source_mask': source_mask.to(dtype=torch.long), 
            'target_ids': target_ids.to(dtype=torch.long),
            'target_ids_y': target_ids.to(dtype=torch.long)
        }


In [None]:
def train(epoch, tokenizer, model, device, loader, optimizer):
    model.train()
    for _,data in enumerate(loader, 0):
        y = data['target_ids'].to(device, dtype = torch.long)
        y_ids = y[:, :-1].contiguous()
        lm_labels = y[:, 1:].clone().detach()
        lm_labels[y[:, 1:] == tokenizer.pad_token_id] = -100
        ids = data['source_ids'].to(device, dtype = torch.long)
        mask = data['source_mask'].to(device, dtype = torch.long)

        outputs = model(input_ids = ids, attention_mask = mask, decoder_input_ids=y_ids, labels=lm_labels)
        loss = outputs[0]
        

        if _%500==0:
            print(f'Epoch: {epoch}, Loss:  {loss.item()}')
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

In [None]:
def validate(epoch, tokenizer, model, device, loader):
    model.eval()
    predictions = []
    actuals = []
    with torch.no_grad():
        for _, data in enumerate(loader, 0):
            y = data['target_ids'].to(device, dtype = torch.long)
            ids = data['source_ids'].to(device, dtype = torch.long)
            mask = data['source_mask'].to(device, dtype = torch.long)

            generated_ids = model.generate(
                input_ids = ids,
                attention_mask = mask, 
                max_length=150, 
                num_beams=2,
                repetition_penalty=2.5, 
                length_penalty=1.0, 
                early_stopping=True
                )
            preds = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=True) for g in generated_ids]
            target = [tokenizer.decode(t, skip_special_tokens=True, clean_up_tokenization_spaces=True)for t in y]
            if _%100==0:
                print(f'Completed {_}')

            predictions.extend(preds)
            actuals.extend(target)
    return predictions, actuals

In [None]:
! pip install sentencepiece

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting sentencepiece
  Downloading sentencepiece-0.1.98-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m15.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sentencepiece
Successfully installed sentencepiece-0.1.98


In [None]:
from transformers import T5Config

In [None]:
from transformers import T5Tokenizer

In [None]:
tokenizer = T5TokenizerFast.from_pretrained("t5-base")

For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.
- Be aware that you SHOULD NOT rely on t5-base automatically truncating your input to 512 when padding/encoding.
- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.


In [None]:
df = pd.read_csv("/content/drive/MyDrive/saved_models/neut.csv")

In [None]:

df.head(5)

Unnamed: 0,index,id,article,highlights,bias
0,272581,ed0fed726929c1eeabe6c390e47128dbb7d7a055,By . Mia De Graaf . Britons flocked to beaches...,People enjoyed temperatures of 17C at Brighton...,neutral
1,171868,6a70a0d8d3ed365fe1df6d35f1587a8b9b298618,Video footage shows the heart stopping moment ...,A 17-year-old boy suffering lacerations to his...,neutral
2,63167,b37204c13ea38b511265e41ac69fb12acfb63f85,"Istanbul, Turkey (CNN) -- About 250 people rac...",Syrians citizens hightail it to Turkey .\nMost...,neutral
3,68522,c24e5805afd5145bc48410e876db91d44a06be5e,By . Daily Mail Reporter . PUBLISHED: . 12:53 ...,The Xue Long had provided the helicopter that ...,neutral
4,81888,e80e130d55bf30e5a0f547aaaa4cd9930635bfbd,(CNN) -- Place a tennis ball into a yellow soc...,'Muggle quidditch' replicates Harry Potter's m...,neutral


In [None]:
df = df[['article','highlights']]

In [None]:
  train_size = 0.8
  train_dataset=df.sample(frac=train_size,random_state = 42)
  val_dataset=df.drop(train_dataset.index).reset_index(drop=True)
  train_dataset = train_dataset.reset_index(drop=True)

In [None]:
    print("FULL Dataset: {}".format(df.shape))
    print("TRAIN Dataset: {}".format(train_dataset.shape))
    print("TEST Dataset: {}".format(val_dataset.shape))

FULL Dataset: (15467, 2)
TRAIN Dataset: (12374, 2)
TEST Dataset: (3093, 2)


In [None]:
           # Initialize config
TRAIN_BATCH_SIZE = 2    # input batch size for training (default: 64)
VALID_BATCH_SIZE = 2    # input batch size for testing (default: 1000)
TRAIN_EPOCHS = 2        # number of epochs to train (default: 10)
VAL_EPOCHS = 1 
LEARNING_RATE = 2e-4    # learning rate (default: 0.01)
SEED = 42               # random seed (default: 42)
MAX_LEN = 512
SUMMARY_LEN = 150 

In [None]:
training_set = CustomDataset(train_dataset, tokenizer, MAX_LEN, SUMMARY_LEN)
val_set = CustomDataset(val_dataset, tokenizer, MAX_LEN, SUMMARY_LEN)

In [None]:
train_params = {
        'batch_size': TRAIN_BATCH_SIZE,
        'shuffle': True,
        'num_workers': 0
        }

val_params = {
        'batch_size': VALID_BATCH_SIZE,
        'shuffle': False,
        'num_workers': 0
        }

In [None]:
training_loader = DataLoader(training_set, **train_params)
val_loader = DataLoader(val_set, **val_params)

In [None]:
model = T5ForConditionalGeneration.from_pretrained("t5-base")
model = model.to(device)

Downloading pytorch_model.bin:   0%|          | 0.00/892M [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

In [None]:
optimizer = torch.optim.Adam(params =  model.parameters(), lr=LEARNING_RATE)

In [None]:
!pip install --upgrade transformers


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
for epoch in range(TRAIN_EPOCHS):
        train(epoch, tokenizer, model, device, training_loader, optimizer)

print('Now generating summaries on our fine tuned model for the validation dataset and saving it in a dataframe')
for epoch in range(VAL_EPOCHS):
    predictions, actuals = validate(epoch, tokenizer, model, device, val_loader)
    final_df = pd.DataFrame({'Generated Text':predictions,'Actual Text':actuals})
    final_df.to_csv('./models/predictions.csv')
    print('Output Files generated for review')

Epoch: 0, Loss:  9.746224403381348
Epoch: 0, Loss:  3.399040699005127
Epoch: 0, Loss:  3.6994543075561523
Epoch: 0, Loss:  2.1720213890075684
Epoch: 0, Loss:  3.031092643737793
Epoch: 0, Loss:  3.2436020374298096
Epoch: 0, Loss:  3.095874547958374
Epoch: 0, Loss:  3.303894519805908
Epoch: 0, Loss:  3.4430267810821533
Epoch: 0, Loss:  3.366499185562134
Epoch: 0, Loss:  2.518264055252075
Epoch: 0, Loss:  2.679361343383789
Epoch: 0, Loss:  1.9473178386688232
Epoch: 1, Loss:  1.9011712074279785
Epoch: 1, Loss:  3.0405924320220947
Epoch: 1, Loss:  2.1936779022216797
Epoch: 1, Loss:  2.3758580684661865
Epoch: 1, Loss:  2.415407419204712
Epoch: 1, Loss:  2.5247209072113037
Epoch: 1, Loss:  2.327753782272339
Epoch: 1, Loss:  3.0325398445129395
Epoch: 1, Loss:  2.5330991744995117
Epoch: 1, Loss:  3.0060815811157227
Epoch: 1, Loss:  2.680759906768799
Epoch: 1, Loss:  2.3556339740753174
Epoch: 1, Loss:  2.081122398376465
Now generating summaries on our fine tuned model for the validation dataset 

OSError: ignored

In [None]:
import pickle

In [None]:
pickle.dump(model, open('/content/drive/MyDrive/saved_models/T5','wb'))

In [None]:
encoded_review = tokenizer.encode_plus(
  lol,
  max_length=512,
  add_special_tokens=True,
  return_token_type_ids=False,
  padding="max_length",
  truncation=True,
  return_attention_mask=True,
  return_tensors='pt',
)



NameError: ignored

In [None]:
input_ids = encoded_review['input_ids'].to(device)
attention_mask = encoded_review['attention_mask'].to(device)
output = model(input_ids, attention_mask)
_, prediction = torch.max(output, dim=1)
print(f'News Article: {lol}')
print(f'Bias  : {class_names[prediction]}')

News Article: Measures that would have severely restricted abortion failed Thursday in Nebraska and South Carolina, which both have Republican-controlled legislatures, a reflection of the growing unease among Republicans over the political popularity of strict bans. In Nebraska, a “Heartbeat Act” would have banned most abortions after six weeks except in cases of rape or incest or to preserve the life of the mother once a “fetal heartbeat” was detected, but it stalled in the legislature. A vote to overcome a filibuster of the bill failed by a vote of 32-15, with two senators abstaining, including Republican state Sen. Merv Riepe – leaving the measure one vote shy of the two-thirds majority needed for full consideration. Riepe, a cosigner to the bill, had proposed an amendment to move the ban to 12 weeks instead of six weeks, but his amendment did not receive a vote Thursday. Nebraska Gov. Jim Pillen, a Republican, said he was “profoundly disappointed” in the vote and called for the bil

In [None]:
lol = "Measures that would have severely restricted abortion failed Thursday in Nebraska and South Carolina, which both have Republican-controlled legislatures, a reflection of the growing unease among Republicans over the political popularity of strict bans. In Nebraska, a “Heartbeat Act” would have banned most abortions after six weeks except in cases of rape or incest or to preserve the life of the mother once a “fetal heartbeat” was detected, but it stalled in the legislature. A vote to overcome a filibuster of the bill failed by a vote of 32-15, with two senators abstaining, including Republican state Sen. Merv Riepe – leaving the measure one vote shy of the two-thirds majority needed for full consideration. Riepe, a cosigner to the bill, had proposed an amendment to move the ban to 12 weeks instead of six weeks, but his amendment did not receive a vote Thursday. Nebraska Gov. Jim Pillen, a Republican, said he was “profoundly disappointed” in the vote and called for the bill to be reconsidered. “It is unacceptable for senators to be present not voting on such a momentous vote,” Pillen said in a statement on Thursday. “I call on Senator Merv Riepe to make a motion to reconsider and stand by the commitments to life he has made in the past.” Nebraska law currently prohibits most abortions starting at 20 weeks. On Thursday afternoon, the South Carolina state Senate failed to pass the “Human Life Protection Act,” which would have banned abortions in the state, in a 22-21 vote with five women voting against it – including three Republicans. The bill previously passed in the state House and included exceptions for incidents of rape or incest. The bill will now be carried over to next year’s legislative session, which begins in January. “Once a woman became pregnant for any reason, she would now become property of the state of South Carolina if the ‘Human Life Protection Act’ were [to] come into law,” Republican state Sen. Katrina Frye Shealy, who voted against the bill, said Wednesday on the Senate floor. “She could no longer make decisions on her own or at the advice of her well-trained doctor. Every female, regardless of her age, would suddenly become subject to the power of a code book regarding her health.” Penry Gustafson, another Republican South Carolina state senator who voted against the bill, told CNN’s Boris Sanchez on Friday while she is in favor of a six-week abortion ban and is against abortion rights, she voted against the bill because she did not believe it would pass the state’s Supreme Court. Gustafson also said she disagrees with support for a nationwide 15-week abortion ban that has been promoted by some Congressional Republicans, including South Carolina Sen. Lindsey Graham. “I’ve talked with some of my colleagues, especially my female colleagues, and we think it’s a bad idea. If you are going to send the decision-making to the states, we need to do that. We need to legislate on behalf of our states,” she said on “CNN News Central.” Vicki Ringer, director of public affairs for Planned Parenthood South Atlantic, said in a statement, “the government should never force a person to carry a pregnancy or give birth against their will” and that the state legislature is “hellbent on controlling the decisions of women.” In the wake of last year’s Supreme Court ruling overturning Roe v. Wade, many Republican-led states have enacted sweeping abortion restrictions. But unpopularity, demonstrated in some instances by electoral evidence, over severe restrictions has prompted Republicans to reconsider the political wisdom of enacting near-total bans on the procedure, and House Republicans have recently stepped away from pushing a federal abortion ban."

In [None]:
hehe = []

In [None]:
for i in range(0,100):
  hehe.append(result(df['content'][i]))

In [None]:
 l = result(df['content'][0])

In [None]:
type(l)

str

In [None]:
bias.append(l)

In [None]:
len(hehe)

100

In [None]:
def result(review_text):
  encoded_review = tokenizer.encode_plus(
  review_text,
  max_length=MAX_LEN,
  add_special_tokens=True,
  return_token_type_ids=False,
  padding="max_length",
  truncation=True,
  return_attention_mask=True,
  return_tensors='pt',
)

  input_ids = encoded_review['input_ids'].to(device)
  attention_mask = encoded_review['attention_mask'].to(device)
  output = model(input_ids, attention_mask)
  _, prediction = torch.max(output, dim=1)
  
  return class_names[prediction]


In [None]:
from transformers import pipeline
summarizer = pipeline("summarization", model="t5-base", tokenizer="t5-base", framework="tf")


Downloading (…)lve/main/config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

Downloading tf_model.h5:   0%|          | 0.00/892M [00:00<?, ?B/s]

All model checkpoint layers were used when initializing TFT5ForConditionalGeneration.

All the layers of TFT5ForConditionalGeneration were initialized from the model checkpoint at t5-base.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFT5ForConditionalGeneration for predictions without further training.


Downloading (…)neration_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Downloading (…)ve/main/spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.
- Be aware that you SHOULD NOT rely on t5-base automatically truncating your input to 512 when padding/encoding.
- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.


In [None]:
summarizer(df['content'][0], min_length=100, max_length=150)

[{'summary_text': 'besides his most recent trip to Quetta , Mr. Rahami visited Karachi , Pakistan , in 2005 . Both of those cities ’ reputations have become entwined with the militant groups who have sheltered there : Karachi as a haven for the Pakistani Taliban and Al Qaeda , and Quetta as the headquarters of the exiled Afghan Taliban leadership . but both cities are also home to generations of Afghans who have fled violence in their home country .'}]

In [None]:
! kaggle datasets download -d gowrishankarp/newspaper-text-summarization-cnn-dailymail

Downloading newspaper-text-summarization-cnn-dailymail.zip to /content
 96% 485M/503M [00:02<00:00, 134MB/s]
100% 503M/503M [00:02<00:00, 183MB/s]


In [None]:
! unzip /content/newspaper-text-summarization-cnn-dailymail.zip


Archive:  /content/newspaper-text-summarization-cnn-dailymail.zip
  inflating: cnn_dailymail/test.csv  
  inflating: cnn_dailymail/train.csv  
  inflating: cnn_dailymail/validation.csv  


In [None]:
nus = pd.read_csv('/content/cnn_dailymail/train.csv')

In [None]:
nus.head(5)

Unnamed: 0,id,article,highlights
0,0001d1afc246a7964130f43ae940af6bc6c57f01,By . Associated Press . PUBLISHED: . 14:11 EST...,"Bishop John Folda, of North Dakota, is taking ..."
1,0002095e55fcbd3a2f366d9bf92a95433dc305ef,(CNN) -- Ralph Mata was an internal affairs li...,Criminal complaint: Cop used his role to help ...
2,00027e965c8264c35cc1bc55556db388da82b07f,A drunk driver who killed a young woman in a h...,"Craig Eccleston-Todd, 27, had drunk at least t..."
3,0002c17436637c4fe1837c935c04de47adb18e9a,(CNN) -- With a breezy sweep of his pen Presid...,Nina dos Santos says Europe must be ready to a...
4,0003ad6ef0c37534f80b55b4235108024b407f0b,Fleetwood are the only team still to have a 10...,Fleetwood top of League One after 2-0 win at S...


In [None]:
nus['highlights'][0]

'Bishop John Folda, of North Dakota, is taking time off after being diagnosed .\nHe contracted the infection through contaminated food in Italy .\nChurch members in Fargo, Grand Forks and Jamestown could have been exposed .'

In [None]:
nus.shape

(287113, 3)

In [None]:
op = nus.sample(n=40000)

In [None]:
op.reset_index(inplace=True)

In [None]:
op['article'][0]

"By . Mia De Graaf . Britons flocked to beaches across the southern coast yesterday as millions look set to bask in glorious sunshine today. Temperatures soared to 17C in Brighton and Dorset, with people starting their long weekend in deck chairs by the sea. Figures from Asda suggest the unexpected sunshine has also inspired a wave of impromptu barbecues, with sales of sausages and equipment expected to triple those in April. Sun's out: Brighton beach was packed with Britons enjoying the unexpected sunshine to start the long weekend as temperatures hit 17C across the south coast . Although frost is set to hit the south tonight - with temperatures dropping to 1C - Britons stocking up for a barbecue will be in luck tomorrow, with forecasters predicting dry and sunny weather across southern England, southern Wales and the south Midlands. In Weymouth, Dorset, the sun came out in time for the town's annual kite festival, held on the beach. But the good weather has not been enjoyed by all as

In [None]:
roi = []

for i in range(0,40000):
  roi.append(result(op['article'][i]))


In [None]:
op['bias'] = roi

In [None]:
op.bias.value_counts()

right      23481
neutral    15467
left        1052
Name: bias, dtype: int64

In [None]:
 neutral = op[op['bias']=="neutral"]

In [None]:
neutral.head(5)

Unnamed: 0,index,id,article,highlights,bias
0,272581,ed0fed726929c1eeabe6c390e47128dbb7d7a055,By . Mia De Graaf . Britons flocked to beaches...,People enjoyed temperatures of 17C at Brighton...,neutral
2,171868,6a70a0d8d3ed365fe1df6d35f1587a8b9b298618,Video footage shows the heart stopping moment ...,A 17-year-old boy suffering lacerations to his...,neutral
3,63167,b37204c13ea38b511265e41ac69fb12acfb63f85,"Istanbul, Turkey (CNN) -- About 250 people rac...",Syrians citizens hightail it to Turkey .\nMost...,neutral
4,68522,c24e5805afd5145bc48410e876db91d44a06be5e,By . Daily Mail Reporter . PUBLISHED: . 12:53 ...,The Xue Long had provided the helicopter that ...,neutral
5,81888,e80e130d55bf30e5a0f547aaaa4cd9930635bfbd,(CNN) -- Place a tennis ball into a yellow soc...,'Muggle quidditch' replicates Harry Potter's m...,neutral


In [None]:
neutral.shape

(15467, 5)

In [None]:
neutral.to_csv("/content/drive/MyDrive/saved_models/neut.csv",index=False)

In [None]:
from transformers import T5Tokenizer, T5ForConditionalGeneration

In [None]:

from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'


In [None]:
def train(epoch, tokenizer, model, device, loader, optimizer):
    model.train()
    for _,data in enumerate(loader, 0):
        y = data['target_ids'].to(device, dtype = torch.long)
        y_ids = y[:, :-1].contiguous()
        lm_labels = y[:, 1:].clone().detach()
        lm_labels[y[:, 1:] == tokenizer.pad_token_id] = -100
        ids = data['source_ids'].to(device, dtype = torch.long)
        mask = data['source_mask'].to(device, dtype = torch.long)

        outputs = model(input_ids = ids, attention_mask = mask, decoder_input_ids=y_ids, lm_labels=lm_labels)
        loss = outputs[0]
        
       

        if _%500==0:
            print(f'Epoch: {epoch}, Loss:  {loss.item()}')
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


In [None]:

def validate(epoch, tokenizer, model, device, loader):
    model.eval()
    predictions = []
    actuals = []
    with torch.no_grad():
        for _, data in enumerate(loader, 0):
            y = data['target_ids'].to(device, dtype = torch.long)
            ids = data['source_ids'].to(device, dtype = torch.long)
            mask = data['source_mask'].to(device, dtype = torch.long)

            generated_ids = model.generate(
                input_ids = ids,
                attention_mask = mask, 
                max_length=150, 
                num_beams=2,
                repetition_penalty=2.5, 
                length_penalty=1.0, 
                early_stopping=True
                )
            preds = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=True) for g in generated_ids]
            target = [tokenizer.decode(t, skip_special_tokens=True, clean_up_tokenization_spaces=True)for t in y]
            if _%100==0:
                print(f'Completed {_}')

            predictions.extend(preds)
            actuals.extend(target)
    return predictions, actuals

In [None]:
!pip install sentencepiece

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting sentencepiece
  Downloading sentencepiece-0.1.98-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m18.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sentencepiece
Successfully installed sentencepiece-0.1.98


In [None]:
tokenizer = T5Tokenizer.from_pretrained("t5-base")



TypeError: ignored

AttributeError: ignored