In [None]:
import torch
import torch.nn as nn

In [None]:
class SelfAttention(nn.Module):
  def _init_(self, embed_size, heads):
    super(SelfAttention, self)._init_()
    self.embed_size = embed_size
    self.heads = heads
    self.head_dim = embed_size  // heads

    assert (self.head_dim * heads == embed_size), "Embed size needs to be divisible by heads"

    self.values = nn.Linear(self.head_dim, self.head_dim , bias=False)
    self.keys = nn.Linear(self.head_dim, self.head_dim, bias= False)
    self.queries = nn.Linear(self.head_dim, self.head_dim, bias= False)
    self.fc_out = nn.Linear(heads*self.head_dim, embed_size)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
  def forward(self,values,keys,query , mask):
    N = query.shape[0]
    value_len, key_len, query_len = values.shape[1], keys.shape[1], query.shape[1]

    #Split embedding into self.heads pieces
    values = values.reshape(N, value_len, self.heads , self.head_dim)
    keys = keys.reshape(N, query_len, self.heads,self.head_dim)
    queries = query.reshape(N,query_len, self.heads, self.head_dim)
    energy = torch.einsum("nqhd,nkhd->nhqk", [queries, keys])

    values = self.values(values)
    keys = self.keys(keys)
    queries= self.queries(queries)

    # queries shape : (N, query_len, heads, heads_dim)
    # keys shape : (N, key_len, heads, heads_dim)
    # energy_shape : (N, heads, query_len, key_len)

    if mask is not None:
      energy = energy.masked_fill(mask == 0, float("-1e20"))

    attention = torch.softmax(energy / (self.embed_size ** (1/2)), dim = 3)

    out = torch.einsum("nhql,nlhd->nqhd",nqhd, [attention, values]).reshape(
        N, query_len , self.heads*self.head_dim
    )
    # attention shape : (N , heads, query_len, key_len)
    # value shape: (N, value_len, heads, heads_dim)
    # (N, query_len, heads , head_dim)
    #after einsum (N, query_len, heads, head_dim) then flatten last two dimensions

    out - self.fc_out(out)
    return out

In [None]:
#Single Block
class TransformerBlock(nn.Module):
  def _init_(self, embed_size, heads, dropout , forward_expansion):
    super(TransformerBlock, self)._init_()
    self.attention = SelfAttention(embed_size,heads)
    self.norm1 = nn.LayerNorm(embed_size)
    self.norm2 = nn.LayerNorm(embed_size)

    #high ->linear trans-> low dim space
    self.feed_forward = nn.Sequential(
        nn.Linear(embed_size, forward_expansion*embed_size), #applies input to high dimesnisonal space
        nn.ReLU(),
        nn.Linear(forward_expansion*embed_size, embed_size)
    )
    self.dropout = nn.Dropout(dropout)

  def forward(self):
    attention = self.attention(value, key , query, mask)

    x = self.dropout(attention + query)
    forward = self.feed_forward(x)
    out = self.dropout(self.norm2(forward + x))
    return out

### **ENCODER**

In [None]:
class Encoder(nn.Module):
  def _init_(
      self,
      src_vocab_size,
      embed_size,
      num_layers,
      heads,
      device,
      forward_expansion,
      dropout,
      max_length,
      ):
    super(Encoder, self)._init_()
    self.embed_size = embed_size
    self.device = device
    self.word_embedding = nn.Embedding(src_vocab_size, embed_size)
    self.position_embedding = nn.Embedding(max_length, embed_size)

    self.layers = nn.ModuleList(
        [
            TransformerBlock(
                embed_size,
                heads,
                dropoout= dropout,
                forward_expansion= forward_expansion,
            )
       for _ in range(num_layers) ]
    )
    self.dropout = nn.Dropout(dropout)

    def forward(self, x ,mask):
      N , seq_length = x.shape
      positions = torch.arrange(0, seq_length).expand(N, seq_length).to(self.device)

      out = self.dropout(self.word_embedding(x) + self.position_embedding(positions))

      for layer in self.layers:
        out = layer(out, out , out, mask)

      return out



### DECODER BLOCK

In [None]:
class DecoderBlock(nn.Module):
  def _init_(self, embed_size , heads, forward_expansion, dropout , device):
    super(DecoderBlock, self)._init_()
    self.attention = SelfAttention(embed_size)
    self.transformer_block = TransformerBlock(
        embed_size, heads, dropout, forward_expansion
    )
    self.dropout = nn.Dropout(dropout)

  def forward(self, x , value, key , src_mask, trg_mask):
    attention = self.attention(x,x,x,trg_mask)
    query = self.dropout(self.norm(attention + x))
    out = self.transformer_block(value, key , query , src_mask)
    return out


### DECODER

In [None]:
class Decoder(nn.Module):
  def _init_(self,
             trg_vocab_size,
             embed_size,
             num_layers,
             heads,
             forward_expansion,
             dropout,
             device,
             max_length,
             ):
    super(Decoder, self)._init_()
    self.device = device
    self.word_embedding = nn.Embedding(trg_vocab_size, embed_size)
    self.position_embedding = nn.Embedding(max_length , embed_size)

    self.layers = nn.ModuleList(
        [DecoderBlock(embed_size, heads, forward_expansion, dropout, device)
        for _ in range(num_layers)]
    )

    self.fc_out = nn.Linear(embed_size, trg_vocab_size)
    self.dropout = nn.Dropout(dropout)

    #The forward method takes the input sequence, encoder output, and the source and target masks. It applies the embedding and positi
    #on embedding to the input sequence, and then passes it through each of the DecoderBlock layers in the list.
    # Finally, the output is passed through the fc_out layer and returned.
  def forward(self, x, enc_out, src_mask, trg_mask):
    N, seq_length = x.shape
    positions = torch.arrange(0,seq_length).expand(N, seq_length).to(self.device)
    x = self.dropout((self.word_embedding(x)+ self.position_embedding(positions)))

    for layer in self.layers:
      x = layer(x, enc_out, enc_out, src_mask, trg_mask)

      out = self.fc_out(x)
      return out


In [None]:
class Transformer(nn.Module):
  def _init_(
      self,
      src_vocab_size,
      trg_vocab_size,
      src_pad_idx,
      trg_pad_idx,
      embed_size = 256,
      num_layers =6,
      forward_expansion= 4,
      heads= 8,
      dropout= 0,
      device="cuda",
      max_length=100
  ):

  # It takes in the source vocabulary size, target vocabula
  #source padding index, target padding index, embedding size,
  #number of layers, number of attention heads, device type, forward expansion rate, dropout rate, and maximum sequence length.
    super(Transformer, self)._init_()

    self.encoder = Encoder(
        src_vocab_size,
        embed_size,
        num_layers,
        heads,
        device,
        forward_expansion,
        dropout,
        max_length,

    )

    self.decoder = Decoder(
        trg_vocab_size,
        num_layers,
        heads,
        forward_expansion,
        dropout,
        device,
        max_length
    )
    self.src_pad_idx = src_pad_idx
    self.trg_pad_idx = trg_pad_idx
    self.device = device

  def make_src_mask(self,src):
    src_mask = (src != self.src_pad_idx).unsqueeze(1).unsqueeze(2)
    return src_mask.to(self.device)

  def make_trg_mask(self,trg):
    N,trg_len = trg.shape
    trg_mask = torch.tril(torch.ones((trg_len, trg_len))).expand(
        N, 1 , trg_len, trg_len

    )
    return trg_mask.to(self.device)

  def forward(self, src, trg):
    src_mask = self.make_src_mask(src)
    trg_mask = self.make_trg_mask(trg)
    enc_src = self.encoder(src, src_mask)
    out - self.decoder(trg, enc_src, src_mask, trg_mask)
    return out

In [None]:
!pip install transformers[sentencepiece]

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers[sentencepiece]
  Downloading transformers-4.28.1-py3-none-any.whl (7.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m57.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m56.0 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.14.1-py3-none-any.whl (224 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.5/224.5 kB[0m [31m24.3 MB/s[0m eta [36m0:00:00[0m
Collecting protobuf<=3.20.2
  Downloading protobuf-3.20.2-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32

In [None]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
#tramsformer lib
tokenizer = AutoTokenizer.from_pretrained("google/roberta2roberta_L-24_cnn_daily_mail")
model = AutoModelForSeq2SeqLM.from_pretrained("google/roberta2roberta_L-24_cnn_daily_mail")
#cnn daily news
article = """	(The Hollywood Reporter)"The Rocky Horror Picture
Show" is the latest musical getting the small-
screen treatment. Fox is developing a two-hour
remake of the 1975 cult classic to be directed,
executive-produced and choreographed by Kenneth
Ortega ("High School Musical"). The project,
tentatively titled "The Rocky Horror Picture Show
Event," is casting-contingent. The special will be
filmed in advance and not air live, but few
details beyond that are known. In addition to
Ortega, Gail Berman and Lou Adler, who produced
the original film, are also attached as executive
producers. The special will be produced by Fox 21
Television Studios, and Berman's The Jackal Group.
The special is timed to celebrate the 40th
anniversary of the film, which has grossed more
than $112 million and still plays in theaters
across the country. TV premiere dates: The
complete guide . This isn't the first stab at
adapting "The Rocky Horror Picture Show." In 2002,
Fox unveiled plans for an adaptation timed to the
30th anniversary that never came to fruition. The
faces of pilot season 2015 . Fox's "Glee" covered
several of the show's most popular songs for a
Season 2 episode and even released a special "The
Rocky Horror Glee Show" EP. There is no plan yet
for when the adaptation will air. Fox also has a
live musical production of "Grease", starring
Julianne Hough and Vanessa Hudgens, scheduled to
air on Jan. 31, 2016. Broadcast TV scorecard .
Following in the footsteps of "The Sound of Music"
and "Peter Pan," NBC recently announced plans to
air a live version of The Wiz later this year.
Ortega's credits include "Gilmore Girls," "This Is
It" and "Hocus Pocus." He is repped by Paradigm
and Hanson, Jacobson. ©2015 The Hollywood
Reporter. All rights reserved."""

input_ids = tokenizer(article, return_tensors="pt").input_ids
output_ids = model.generate(input_ids)[0]

Downloading (…)lve/main/config.json:   0%|          | 0.00/3.44k [00:00<?, ?B/s]

Downloading (…)ve/main/spiece.model:   0%|          | 0.00/846k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.82G [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/207 [00:00<?, ?B/s]



In [None]:
print(tokenizer.decode(output_ids, skip_special_tokens=True))

Fox is developing a two-hour remake of the 1975 cult classic. The special will be directed, executive-produced and choreographed by Kenneth Ortega. The special is timed to celebrate the 40th anniversary of the film, which has grossed more than $112 million.


In [None]:
import spacy
from transformers import pipeline
import csv

# Load SpaCy's English language model
nlp = spacy.load("en_core_web_sm")

# Load the summarization pipeline using Hugging Face and Transformer models
summarizer = pipeline("summarization")

# Open the CSV file for reading
with open('/content/drive/MyDrive/Colab Notebooks/Untitled-document-_1_.csv', 'r') as input_file:
    csv_reader = csv.reader(input_file)

    # Create a new CSV file for writing the results
    with open('output.csv', 'w', newline='') as output_file:
        csv_writer = csv.writer(output_file)

        # Process each row of the input CSV file
        for row in csv_reader:
            # Convert the text to a SpaCy Doc object
            doc = nlp(row[0])

            # Summarize the text using Hugging Face and Transformer models
            summary = summarizer(doc.text, max_length=100, min_length=30, do_sample=False)[0]['summary_text']

            # Write the original text, the summarized text, and the comparison to the output CSV file
            csv_writer.writerow([doc.text, summary, doc.text != summary])

No model was supplied, defaulted to sshleifer/distilbart-cnn-12-6 and revision a4f8f3e (https://huggingface.co/sshleifer/distilbart-cnn-12-6).
Using a pipeline without specifying a model name and revision in production is not recommended.


Downloading (…)lve/main/config.json:   0%|          | 0.00/1.80k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]