In [1]:
import torch
import torch.nn as nn
import math

In [2]:
class SelfAttention(nn.Module):
  def __init__(self, embed_size, heads):
    super(SelfAttention, self).__init__()
    self.embed_size = embed_size
    self.heads = heads
    self.head_dim = embed_size // heads
    self.values= nn.Linear(embed_size,embed_size)
    self.keys = nn.Linear(embed_size, embed_size)
    self.queries = nn.Linear(embed_size, embed_size)
    self.fc_out = nn.Linear(embed_size, embed_size)

  def forward(self,x):
    N, seq_length,embed_size=x.shape
    values = self.values(x)
    keys = self.keys(x)
    queries = self.queries(x)

    energy=torch.matmul(queries,keys.permute(0,2,1))/math.sqrt(self.embed_size)
    attention = torch.softmax(energy, dim=-1)
    out = torch.matmul(attention, values)

    return self.fc_out(out)

In [3]:
class PositionalEncoding(nn.Module):
    def __init__(self, embed_size, max_len=100):
        super().__init__()
        pe = torch.zeros(max_len, embed_size)
        position = torch.arange(0, max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, embed_size, 2) * (-math.log(10000.0) / embed_size))

        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)

        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)

    def forward(self, x):
        return x + self.pe[:, :x.size(1)]


In [4]:
from transformers import pipeline
classifier= pipeline("sentiment-analysis")
result= classifier("Tranforrmers are amazing!")
print(result)

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

Device set to use cpu


[{'label': 'POSITIVE', 'score': 0.9998518228530884}]


In [7]:
summarizer= pipeline("summarization")
text= "Tranformers are neural networks that use attention to boost performance on NLP tasks."
print(summarizer(text,max_length=56, min_length=5, do_sample= False))



No model was supplied, defaulted to sshleifer/distilbart-cnn-12-6 and revision a4f8f3e (https://huggingface.co/sshleifer/distilbart-cnn-12-6).
Using a pipeline without specifying a model name and revision in production is not recommended.
Device set to use cpu
Your max_length is set to 56, but your input_length is only 20. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=10)


[{'summary_text': ' Tranformers are neural networks that use attention to boost performance on NLP tasks .'}]


In [8]:
from transformers import BertTokenizer, BertForSequenceClassification
from transformers import Trainer, TrainingArguments
from datasets import load_dataset

tokenizer= BertTokenizer.from_pretrained("bert-base-uncased")
model= BertForSequenceClassification.from_pretrained("bert-base-uncased")

dataset= load_dataset("imdb", split= "train[:1%]")
def preprocess(example):
  return tokenizer(example["text"],truncation=True,padding=True)

encoded= dataset.map(preprocess, batched= True)
encoded= encoded.rename_column("label", "labels")

training_args = TrainingArguments(output_dir="test_trainer", per_device_train_batch_size=8, num_train_epochs=1)
trainer = Trainer(model=model, args=training_args, train_dataset=encoded)

trainer.train()

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Downloading readme: 0.00B [00:00, ?B/s]

ValueError: Invalid pattern: '**' can only be an entire path component