# Library installation

In [1]:
!pip install --quiet flashtext
!pip install --quiet transformers
!pip install --quiet sentencepiece
!pip install --quiet textwrap3
!pip install --quiet gradio
!pip install --quiet strsim
!pip install --quiet sense2vec
!pip install --quiet sentence-transformers
!pip install scipy
!pip install networkx

  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for flashtext (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.2/7.2 MB[0m [31m82.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m236.8/236.8 kB[0m [31m19.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m66.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m37.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m44.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.7/19.7 MB[0m [31m69.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.0/57.0 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?2

In [2]:
from transformers import GPTNeoForCausalLM, GPT2Tokenizer
import torch
import pandas as pd
import numpy as np
import textwrap

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Evaluative using BLEU

In [4]:
raw_paragraph = "After centuries of Danish, Swedish and German rule the native Estonians started to yearn for independence during the period of national awakening while being governed by the Russian Empire. Established on 24 February 1918, the Republic of Estonia came into existence towards the end of World War I. During World War II, Estonia was then occupied by the Soviet Union in 1940, then Nazi Germany a year later and again in 1944 establishing the Estonian Soviet Socialist Republic. In 1988, during the Singing Revolution, the Estonian SSR issued the Estonian Sovereignty Declaration to defy against the illegal Soviet rule. Estonia then restored its independence during the 1991 coup by the Soviets on the night of 20 August 1991."
raw_answer = "1918"
actual_question = "What year was the Republic of Estonia established??"

In [5]:
# raw_paragraph = "Throughout the 1980s and 1990s, demand for a Scottish Parliament grew, in part because the government of the United Kingdom was controlled by the Conservative Party, while Scotland itself elected relatively few Conservative MPs. In the aftermath of the 1979 referendum defeat, the Campaign for a Scottish Assembly was initiated as a pressure group, leading to the 1989 Scottish Constitutional Convention with various organisations such as Scottish churches, political parties and representatives of industry taking part. Publishing its blueprint for devolution in 1995, the Convention provided much of the basis for the structure of the Parliament."
# raw_answer = "blueprint"
# actual_question = "What provided much of the basis for the structure of the Parliament in 1995?"

In [6]:
import nltk
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

def calculate_bleu_scores(actual_question, predicted_question):
    # Tokenize the actual and predicted questions
    actual_question_tokenized = actual_question.split()
    predicted_question_tokenized = predicted_question.split()

    # Calculate BLEU-1, BLEU-2, BLEU-3, and BLEU-4 scores
    weights = [(1.0, 0.0, 0.0, 0.0), (0.5, 0.5, 0.0, 0.0), (0.33, 0.33, 0.33, 0.0), (0.25, 0.25, 0.25, 0.25)]
    bleu_scores = []

    for n in range(1, 5):
        bleu = sentence_bleu([actual_question_tokenized], predicted_question_tokenized, weights=weights[n-1], smoothing_function=SmoothingFunction().method1)
        bleu_scores.append(bleu)

    return bleu_scores

# Evaluating using METEOR

In [7]:
nltk.download('punkt')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [8]:
import nltk
from nltk.translate import meteor_score

def calculate_meteor_score(actual_question, predicted_question):
    # Tokenize the actual and predicted questions
    actual_question_tokenized = nltk.word_tokenize(actual_question)
    predicted_question_tokenized = nltk.word_tokenize(predicted_question)

    # Calculate the METEOR score
    meteor_score_value = meteor_score.meteor_score([actual_question_tokenized], predicted_question_tokenized)
    return meteor_score_value

# Testing GPT Neo finetuned

In [9]:
# output_dir = "/content/drive/MyDrive/GPT_Neo_finetuned_CA V1"
output_dir = "/content/drive/MyDrive/SQuAD_summary_old_paq_GPT_Neo_finetuned_done"
# output_dir = "/content/drive/MyDrive/SQuAD_summary_paq_GPT_Neo_finetuned_done"
# output_dir = "/content/drive/MyDrive/GPT_Neo_finetuned_v1_final"
# output_dir = "/content/drive/MyDrive/GPT_Neo_finetuned_v2"

In [10]:
model_name = "EleutherAI/gpt-neo-125M"
model_neo = GPTNeoForCausalLM.from_pretrained(output_dir)
tokenizer_neo = GPT2Tokenizer.from_pretrained(model_name)

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/357 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/560 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.01k [00:00<?, ?B/s]

In [11]:
def generate_question_neo(paragraph, answer, model = model_neo, tokenizer = tokenizer_neo):
    text = "<START> <CONTEXT_START> " + paragraph + " <CONTEXT_END> <ANSWER_START> " + answer + " <ANSWER_END>"

    # Tokenize the paragraph
    input_ids = tokenizer.encode(text, add_special_tokens=False, return_tensors='pt')

    # Generate qa
    with torch.no_grad():
        output = model.generate(input_ids, max_length=500, do_sample=True, temperature=0.1)

    generated_qa = tokenizer.decode(output[0], skip_special_tokens=True)

    generated_qa = generated_qa.split(">")
    question = generated_qa[6].split("<")[0].strip()

    return question

In [12]:
neo_question = generate_question_neo(raw_paragraph, raw_answer)
print("Generated Question : ", neo_question)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Generated Question :  When was the Republic of Europe established?


In [13]:
bleu_scores_neo = calculate_bleu_scores(actual_question, neo_question)

print("Paragragh : ", textwrap.fill(raw_paragraph, width=120))
print()
print("Answer : ", raw_answer)
print()
print("Actual Question : ", actual_question)
print()
print("Predicted Question : ", neo_question)
print()
print()
print("GPT Neo Predictions scores")
print("BLEU-1: ", bleu_scores_neo[0])
print("BLEU-2: ", bleu_scores_neo[1])
print("BLEU-3: ", bleu_scores_neo[2])
print("BLEU-4: ", bleu_scores_neo[3])

Paragragh :  After centuries of Danish, Swedish and German rule the native Estonians started to yearn for independence during the
period of national awakening while being governed by the Russian Empire. Established on 24 February 1918, the Republic
of Estonia came into existence towards the end of World War I. During World War II, Estonia was then occupied by the
Soviet Union in 1940, then Nazi Germany a year later and again in 1944 establishing the Estonian Soviet Socialist
Republic. In 1988, during the Singing Revolution, the Estonian SSR issued the Estonian Sovereignty Declaration to defy
against the illegal Soviet rule. Estonia then restored its independence during the 1991 coup by the Soviets on the night
of 20 August 1991.

Answer :  1918

Actual Question :  What year was the Republic of Estonia established??

Predicted Question :  When was the Republic of Europe established?


GPT Neo Predictions scores
BLEU-1:  0.4953587998572467
BLEU-2:  0.4633657281473354
BLEU-3:  0.423735913

In [14]:
meteor_score_value = calculate_meteor_score(actual_question, neo_question)
print("METEOR score for GPT-Neo: ", meteor_score_value)

METEOR score for GPT-Neo:  0.5739795918367346


# Testing T5 finetuned

In [15]:
import torch
from transformers import T5ForConditionalGeneration,T5Tokenizer

In [16]:
trained_model_path = '/content/drive/MyDrive/t5_paq_v1_done/model'
trained_tokenizer = '/content/drive/MyDrive/t5_paq_v1_done/tokenizer'

In [17]:
model_t5 = T5ForConditionalGeneration.from_pretrained(trained_model_path)
tokenizer_t5 = T5Tokenizer.from_pretrained(trained_tokenizer)

In [18]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print ("device ",device)
model_t5 = model_t5.to(device)

device  cpu


In [19]:
text = "context: "+ raw_paragraph + " " + "answer: " + raw_answer + " </s>"
print (text)

context: After centuries of Danish, Swedish and German rule the native Estonians started to yearn for independence during the period of national awakening while being governed by the Russian Empire. Established on 24 February 1918, the Republic of Estonia came into existence towards the end of World War I. During World War II, Estonia was then occupied by the Soviet Union in 1940, then Nazi Germany a year later and again in 1944 establishing the Estonian Soviet Socialist Republic. In 1988, during the Singing Revolution, the Estonian SSR issued the Estonian Sovereignty Declaration to defy against the illegal Soviet rule. Estonia then restored its independence during the 1991 coup by the Soviets on the night of 20 August 1991. answer: 1918 </s>


In [20]:
def generate_question_t5(paragraph, ans, model = model_t5, tokenizer = tokenizer_t5):
  text = "context: "+paragraph + " " + "answer: " + ans + " </s>"
  encoding = tokenizer.encode_plus(text,max_length =512, padding=True, return_tensors="pt")
  input_ids,attention_mask  = encoding["input_ids"].to(device), encoding["attention_mask"].to(device)

  generated = model.generate(
    input_ids=input_ids,attention_mask=attention_mask,
    max_length=72,
    early_stopping=True,
    num_beams=5,
    num_return_sequences=3
    )
  ques = tokenizer.decode(generated[0], skip_special_tokens=True,clean_up_tokenization_spaces=True)

  return ques

In [21]:
t5_question = generate_question_t5(raw_paragraph, raw_answer)
print("Generated Question : ", t5_question)



Generated Question :  question: When was the Republic of Estonia established?


In [22]:
bleu_scores_t5 = calculate_bleu_scores(actual_question, t5_question)
print("Paragragh : ", textwrap.fill(raw_paragraph, width=120))
print()
print("Answer : ", raw_answer)
print()
print("Actual Question : ", actual_question)
print()
print("Predicted Question : ", t5_question.split(":")[1])
print()
print()
print("T5 Predictions")
print("BLEU-1: ", bleu_scores_t5[0])
print("BLEU-2: ", bleu_scores_t5[1])
print("BLEU-3: ", bleu_scores_t5[2])
print("BLEU-4: ", bleu_scores_t5[3])

Paragragh :  After centuries of Danish, Swedish and German rule the native Estonians started to yearn for independence during the
period of national awakening while being governed by the Russian Empire. Established on 24 February 1918, the Republic
of Estonia came into existence towards the end of World War I. During World War II, Estonia was then occupied by the
Soviet Union in 1940, then Nazi Germany a year later and again in 1944 establishing the Estonian Soviet Socialist
Republic. In 1988, during the Singing Revolution, the Estonian SSR issued the Estonian Sovereignty Declaration to defy
against the illegal Soviet rule. Estonia then restored its independence during the 1991 coup by the Soviets on the night
of 20 August 1991.

Answer :  1918

Actual Question :  What year was the Republic of Estonia established??

Predicted Question :   When was the Republic of Estonia established?


T5 Predictions
BLEU-1:  0.625
BLEU-2:  0.5976143046671968
BLEU-3:  0.566367013455331
BLEU-4:  0.51697

In [23]:
meteor_score_value = calculate_meteor_score(actual_question, t5_question)
print("METEOR score for T5: ", meteor_score_value)

METEOR score for T5:  0.6918367346938775
