In [1]:
#make imports

import os
import sys
import pandas as pd
import numpy as np
import re
import nltk
from nltk.stem import WordNetLemmatizer

nltk.download('wordnet')
!unzip /usr/share/nltk_data/corpora/wordnet.zip -d /usr/share/nltk_data/corpora/

#method to preprocess the text
def preprocess_text(text):

    #remove the html tags / links
    text = re.sub(r'<.*?>', '', text)

    #remove special characters
    text = re.sub(r'[^a-zA-Z\s]', '', text)

    #lemmatize the text
    lemmatizer = WordNetLemmatizer()
    text = ' '.join([lemmatizer.lemmatize(word) for word in text.split()])

    #normalize the text
    text = text.lower()

    return text

#load the saved model
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')
model.load_state_dict(torch.load("/kaggle/input/inference_model/pytorch/inference_slug/1/finetuned_final_final.pth"), strict=False)

#take input from user
input_text = input("Enter the text: ")

#preprocess the input text
input_text = preprocess_text(input_text)

#send model to gpu
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

#generate summary
model.eval()

def generate_summary(review_text):
    # tokenize input review
    inputs = tokenizer.encode(review_text+" TL;DR ", return_tensors='pt').to(device)

    # generate summary
    with torch.no_grad():
        summary_ids = model.generate(inputs, max_length=len(inputs[0])+31, num_beams=4, repetition_penalty=3.0, length_penalty=3.0, early_stopping=False)
    
    # decode summary
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

gen_summary = generate_summary(input_text).split("TL;DR")[-1].strip()
print("The generated summary is: ", gen_summary)

[nltk_data] Downloading package wordnet to /usr/share/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
Archive:  /usr/share/nltk_data/corpora/wordnet.zip
   creating: /usr/share/nltk_data/corpora/wordnet/
  inflating: /usr/share/nltk_data/corpora/wordnet/lexnames  
  inflating: /usr/share/nltk_data/corpora/wordnet/data.verb  
  inflating: /usr/share/nltk_data/corpora/wordnet/index.adv  
  inflating: /usr/share/nltk_data/corpora/wordnet/adv.exc  
  inflating: /usr/share/nltk_data/corpora/wordnet/index.verb  
  inflating: /usr/share/nltk_data/corpora/wordnet/cntlist.rev  
  inflating: /usr/share/nltk_data/corpora/wordnet/data.adj  
  inflating: /usr/share/nltk_data/corpora/wordnet/index.adj  
  inflating: /usr/share/nltk_data/corpora/wordnet/LICENSE  
  inflating: /usr/share/nltk_data/corpora/wordnet/citation.bib  
  inflating: /usr/share/nltk_data/corpora/wordnet/noun.exc  
  inflating: /usr/share/nltk_data/corpora/wordnet/verb.exc  
  inflating: /usr/share/nltk_data/co

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Enter the text:  "I don't know if it's the cactus or the tequila or just the unique combination of ingredients, but the flavour of this hot sauce makes it one of a kind!  We picked up a bottle once on a trip we were on and brought it back home with us and were totally blown away!  When we realized that we simply couldn't find it anywhere in our city we were bummed.<br /><br />Now, because of the magic of the internet, we have a case of the sauce and are ecstatic because of it.<br /><br />If you love hot sauce..I mean really love hot sauce, but don't want a sauce that tastelessly burns your throat, grab a bottle of Tequila Picante Gourmet de Inclan.  Just realize that once you taste it, you will never want to use any other sauce.<br /><br />Thank you for the personal, incredible service!"


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


The generated summary is:  great my dog good treat best i very gluten coffee excellent is tea ydlummyum flavor not likey food productnut snack freeuffsese these delicious


In [2]:
#caculate rouge score

!pip install rouge_score

from rouge_score import rouge_scorer
scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)

#take actual summary from user
input_summary = input("Enter the actual summary: ")
print(input_summary[:200], gen_summary)
scores = scorer.score(input_summary[:200], gen_summary)

print(scores)

  pid, fd = os.forkpty()


Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: rouge_score
  Building wheel for rouge_score (setup.py) ... [?25ldone
[?25h  Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24934 sha256=86abe8fc5d43047ca7fca0260dfcbb23b52c539eb020d65229a12f883eb99b81
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge_score
Installing collected packages: rouge_score
Successfully installed rouge_score-0.1.2


Enter the actual summary:  The Best Hot Sauce in the World


The Best Hot Sauce in the World great my dog good treat best i very gluten coffee excellent is tea ydlummyum flavor not likey food productnut snack freeuffsese these delicious
{'rouge1': Score(precision=0.043478260869565216, recall=0.14285714285714285, fmeasure=0.06666666666666667), 'rouge2': Score(precision=0.0, recall=0.0, fmeasure=0.0), 'rougeL': Score(precision=0.043478260869565216, recall=0.14285714285714285, fmeasure=0.06666666666666667)}
