## Installing Code Modules and Dependencies

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install -U transformers==3.0.0

Collecting transformers==3.0.0
[?25l  Downloading https://files.pythonhosted.org/packages/9c/35/1c3f6e62d81f5f0daff1384e6d5e6c5758682a8357ebc765ece2b9def62b/transformers-3.0.0-py3-none-any.whl (754kB)
[K     |████████████████████████████████| 757kB 5.7MB/s 
[?25hCollecting sentencepiece
[?25l  Downloading https://files.pythonhosted.org/packages/f5/99/e0808cb947ba10f575839c43e8fafc9cc44e4a7a2c8f79c60db48220a577/sentencepiece-0.1.95-cp37-cp37m-manylinux2014_x86_64.whl (1.2MB)
[K     |████████████████████████████████| 1.2MB 27.7MB/s 
Collecting sacremoses
[?25l  Downloading https://files.pythonhosted.org/packages/75/ee/67241dc87f266093c533a2d4d3d69438e57d7a90abb216fa076e7d475d4a/sacremoses-0.0.45-py3-none-any.whl (895kB)
[K     |████████████████████████████████| 901kB 34.9MB/s 
[?25hCollecting tokenizers==0.8.0-rc4
[?25l  Downloading https://files.pythonhosted.org/packages/f7/82/0e82a95bd9db2b32569500cc1bb47aa7c4e0f57aa5e35cceba414096917b/tokenizers-0.8.0rc4-cp37-cp37m-manylinux1

In [None]:
!python -m nltk.downloader punkt

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [None]:
from nltk import sent_tokenize
import itertools
import logging
from typing import Optional, Dict, Union
import torch
from transformers import(
    AutoModelForSeq2SeqLM,
    AutoTokenizer,
    PreTrainedModel,
    PreTrainedTokenizer,
)
logger = logging.getLogger(__name__)

class QGPipeline:
    """Poor man's QG pipeline"""
    def __init__(
        self,
        model: PreTrainedModel,
        tokenizer: PreTrainedTokenizer,
        ans_model: PreTrainedModel,
        ans_tokenizer: PreTrainedTokenizer,
        qg_format: str,
        use_cuda: bool
    ):
        self.model = model
        self.tokenizer = tokenizer

        self.ans_model = ans_model
        self.ans_tokenizer = ans_tokenizer

        self.qg_format = qg_format

        self.device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu"
        self.model.to(self.device)

        if self.ans_model is not self.model:
            self.ans_model.to(self.device)

        assert self.model.__class__.__name__ in ["T5ForConditionalGeneration", "BartForConditionalGeneration"]

        if "T5ForConditionalGeneration" in self.model.__class__.__name__:
            self.model_type = "t5"
        else:
            self.model_type = "bart"

    def __call__(self, inputs: str):
        inputs = " ".join(inputs.split())
        sents, answers = self._extract_answers(inputs)
        flat_answers = list(itertools.chain(*answers))

        if len(flat_answers) == 0:
          return []

        if self.qg_format == "prepend":
            qg_examples = self._prepare_inputs_for_qg_from_answers_prepend(inputs, answers)
        else:
            qg_examples = self._prepare_inputs_for_qg_from_answers_hl(sents, answers)

        qg_inputs = [example['source_text'] for example in qg_examples]
        questions = self._generate_questions(qg_inputs)
        output = [{'answer': example['answer'], 'question': que} for example, que in zip(qg_examples, questions)]
        return output

    def _generate_questions(self, inputs):
        inputs = self._tokenize(inputs, padding=True, truncation=True)

        outs = self.model.generate(
            input_ids=inputs['input_ids'].to(self.device),
            attention_mask=inputs['attention_mask'].to(self.device),
            max_length=32,
            num_beams=4,
        )

        questions = [self.tokenizer.decode(ids, skip_special_tokens=True) for ids in outs]
        return questions

    def _extract_answers(self, context):
        sents, inputs = self._prepare_inputs_for_ans_extraction(context)
        inputs = self._tokenize(inputs, padding=True, truncation=True)

        outs = self.ans_model.generate(
            input_ids=inputs['input_ids'].to(self.device),
            attention_mask=inputs['attention_mask'].to(self.device),
            max_length=32,
        )

        dec = [self.ans_tokenizer.decode(ids, skip_special_tokens=False) for ids in outs]
        answers = [item.split('<sep>') for item in dec]
        answers = [i[:-1] for i in answers]

        return sents, answers

    def _tokenize(self,
        inputs,
        padding=True,
        truncation=True,
        add_special_tokens=True,
        max_length = 512
    ):
        inputs = self.tokenizer.batch_encode_plus(
            inputs,
            max_length=max_length,
            add_special_tokens=add_special_tokens,
            truncation=truncation,
            padding="max_length" if padding else False,
            pad_to_max_length=padding,
            return_tensors="pt"
        )
        return inputs

    def _prepare_inputs_for_ans_extraction(self, text):
        sents = sent_tokenize(text)

        inputs = []
        for i in range(len(sents)):
            source_text = "extract answers:"
            for j, sent in enumerate(sents):
                if i == j:
                    sent = "<hl> %s <hl>" % sent
                source_text = "%s %s" % (source_text, sent)
                source_text = source_text.strip()

            if self.model_type == "t5":
                source_text = source_text + " </s>"
            inputs.append(source_text)

        return sents, inputs

    def _prepare_inputs_for_qg_from_answers_hl(self, sents, answers):
        inputs = []
        for i, answer in enumerate(answers):
            if len(answer) == 0: continue
            for answer_text in answer:
                sent = sents[i]
                sents_copy = sents[:]

                answer_text = answer_text.strip()

                ans_start_idx = sent.index(answer_text)

                sent = f"{sent[:ans_start_idx]} <hl> {answer_text} <hl> {sent[ans_start_idx + len(answer_text): ]}"
                sents_copy[i] = sent

                source_text = " ".join(sents_copy)
                source_text = f"generate question: {source_text}"
                if self.model_type == "t5":
                    source_text = source_text + " </s>"

                inputs.append({"answer": answer_text, "source_text": source_text})

        return inputs

    def _prepare_inputs_for_qg_from_answers_prepend(self, context, answers):
        flat_answers = list(itertools.chain(*answers))
        examples = []
        for answer in flat_answers:
            source_text = f"answer: {answer} context: {context}"
            if self.model_type == "t5":
                source_text = source_text + " </s>"

            examples.append({"answer": answer, "source_text": source_text})
        return examples


class MultiTaskQAQGPipeline(QGPipeline):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def __call__(self, inputs: Union[Dict, str]):
        if type(inputs) is str:
            # do qg
            return super().__call__(inputs)
        else:
            # do qa
            return self._extract_answer(inputs["question"], inputs["context"])

    def _prepare_inputs_for_qa(self, question, context):
        source_text = f"question: {question}  context: {context}"
        if self.model_type == "t5":
            source_text = source_text + " </s>"
        return  source_text

    def _extract_answer(self, question, context):
        source_text = self._prepare_inputs_for_qa(question, context)
        inputs = self._tokenize([source_text], padding=False)

        outs = self.model.generate(
            input_ids=inputs['input_ids'].to(self.device),
            attention_mask=inputs['attention_mask'].to(self.device),
            max_length=16,
        )

        answer = self.tokenizer.decode(outs[0], skip_special_tokens=True)
        return answer

SUPPORTED_TASKS = {
    "multitask-qa-qg": {
        "impl": MultiTaskQAQGPipeline,
        "default": {
            "model": "valhalla/t5-small-qa-qg-hl",
        }
    }
}

def pipeline(
    task: str,
    model: Optional[Union[str, PreTrainedModel]] = None,
    tokenizer: Optional[Union[str, PreTrainedTokenizer]] = None,
    qg_format: Optional[str] = "highlight",
    ans_model: Optional[PreTrainedModel] = None,
    ans_tokenizer: Optional[Union[str, PreTrainedTokenizer]] = None,
    use_cuda: Optional[bool] = True,
    **kwargs,
):
    # Retrieve the task
    if task not in SUPPORTED_TASKS:
        raise KeyError("Unknown task {}, available tasks are {}".format(task, list(SUPPORTED_TASKS.keys())))

    targeted_task = SUPPORTED_TASKS[task]
    task_class = targeted_task["impl"]

    # Use default model/config/tokenizer for the task if no model is provided
    if model is None:
        model = targeted_task["default"]["model"]

    # Try to infer tokenizer from model or config name (if provided as str)
    if tokenizer is None:
        if isinstance(model, str):
            tokenizer = model
        else:
            # Impossible to guest what is the right tokenizer here
            raise Exception(
                "Impossible to guess which tokenizer to use. "
                "Please provided a PretrainedTokenizer class or a path/identifier to a pretrained tokenizer."
            )

    # Instantiate tokenizer if needed
    if isinstance(tokenizer, (str, tuple)):
        if isinstance(tokenizer, tuple):
            # For tuple we have (tokenizer name, {kwargs})
            tokenizer = AutoTokenizer.from_pretrained(tokenizer[0], **tokenizer[1])
        else:
            tokenizer = AutoTokenizer.from_pretrained(tokenizer)

    # Instantiate model if needed
    if isinstance(model, str):
        model = AutoModelForSeq2SeqLM.from_pretrained(model)
    return task_class(model=model, tokenizer=tokenizer, ans_model=model, ans_tokenizer=tokenizer, qg_format=qg_format, use_cuda=use_cuda)

In [None]:
import pprint
import gensim
from gensim.test.utils import datapath, get_tmpfile
from gensim.models import KeyedVectors
from gensim.scripts.glove2word2vec import glove2word2vec

In [None]:
# glove_file = '/content/drive/MyDrive/EdRes/glove.6B.300d.txt'
tmp_file = '/content/drive/MyDrive/EdRes/word2vec_glove.6B.300d.txt'

In [None]:
# glove2word2vec(glove_file, tmp_file)
model = KeyedVectors.load_word2vec_format(tmp_file)

In [None]:
def generateDistractors(answer, count = 3):
    answer = str.lower(answer)
    try:
        closestWords = model.most_similar(positive=[answer], topn=count)
    except:
        return []
    distractors = list(map(lambda x: x[0], closestWords))[0:count]
    return distractors

In [None]:
generateDistractors("Gandhi", 3)

['mahatma', 'indira', 'rajiv']

## Multitask QA-QG

In [None]:
sample_text = "Gravity (from Latin gravitas, meaning 'weight'), or gravitation, is a natural phenomenon by which all \
things with mass or energy—including planets, stars, galaxies, and even light—are brought toward (or gravitate toward) \
one another. On Earth, gravity gives weight to physical objects, and the Moon's gravity causes the ocean tides. \
The gravitational attraction of the original gaseous matter present in the Universe caused it to begin coalescing \
and forming stars and caused the stars to group together into galaxies, so gravity is responsible for many of \
the large-scale structures in the Universe. Gravity has an infinite range, although its effects become increasingly \
weaker as objects get further away"

### base-model QG

In [None]:
base_qaqg_nlp = pipeline("multitask-qa-qg", model="valhalla/t5-base-qa-qg-hl")

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=629.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=791656.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=31.0, style=ProgressStyle(description_w…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=65.0, style=ProgressStyle(description_w…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=90.0, style=ProgressStyle(description_w…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=891612585.0, style=ProgressStyle(descri…




#### QG

In [None]:
base_qaqg_nlp(sample_text)

[{'answer': 'gravitation', 'question': 'What is another name for gravity?'},
 {'answer': 'Earth',
  'question': 'On what planet does gravity give weight to physical objects?'},
 {'answer': 'galaxies', 'question': 'What do the stars form into?'},
 {'answer': 'weaker',
  'question': "Gravity's effects become what as objects get further away?"}]

## Testing

In [None]:
import random

In [None]:
def getFromTextFile(file_name):
  para = ""
  with open(file_name) as f:
    para = f.read()
  return para

In [None]:
def displayDistractors(choices):
  random.shuffle(choices)
  choice_index = ["a", "b", "c", "d"]
  for i in range(len(choices)):
    print(f"  {choice_index[i]}) {choices[i]}")

In [None]:
for i in range(6):
  generated_data = []
  para = getFromTextFile(f'/content/drive/MyDrive/EdRes/qg_test/test{i}.txt')
  # print(f'Results for test-{i} : ')
  try:
    results = base_qaqg_nlp(para)
    # print(f"PARAGRAPH-{i}")
    # print(para)

    # print("RESULTS")
    print(len(results))
    for result in results:
      if len(result['answer'].split()) > 1 :
        continue
      mcq_dict = {}
      choices = [result['answer']]
      distractors = generateDistractors(result['answer'], 4)
      choices = choices + distractors[1:4]
      if len(choices) == 1:
        continue
      random.shuffle(choices)
      mcq_dict['question'] = result['question']
      mcq_dict['answer'] = result['answer']
      mcq_dict['choices'] = choices
      generated_data.append(mcq_dict)
    pprint.pprint(generated_data)
  except Exception as e:
    print(f'Error in output, {e}')

8
[{'answer': 'Paging',
  'choices': ['Paging', 'pager', 'cellular', 'telephony'],
  'question': 'What is a memory management scheme that eliminates the need for '
              'contiguous allocation of physical memory?'},
 {'answer': 'TLB',
  'choices': ['TLB', 'nke', 'nobe', 'propiska'],
  'question': 'What is a special, small, fast look up hardware cache?'}]
13
[{'answer': 'Bengal',
  'choices': ['kolkata', 'Bengal', 'orissa', 'odisha'],
  'question': 'Where did the first nationalistic revolutionary movement for '
              'Indian independence emerge from?'},
 {'answer': 'anti-colonial',
  'choices': ['anti-british',
              'anti-imperialist',
              'pro-independence',
              'anti-colonial'],
  'question': 'What was the underlying ideology of the Indian self-rule '
              'movement?'},
 {'answer': '1930s',
  'choices': ['1930s', '1960s', '1950s', '1940s'],
  'question': 'When did the Indian self-rule movement begin?'}]
Error in output, substring n

In [None]:
for i in range(6):
  para = getFromTextFile(f'/content/drive/MyDrive/EdRes/qg_test/test{i}.txt')
  print(f'Results for test-{i} : ')
  try:
    results = base_qaqg_nlp(para)
    print(f"PARAGRAPH-{i}")
    print(para)

    print("RESULTS")
    for result in results:
      if len(result['answer'].split()) > 1 :
        continue
      print(f"Q: {result['question']}")
      choices = [result['answer']]
      distractors = generateDistractors(result['answer'], 4)
      choices = choices + distractors[1:4]
      if len(choices) == 1:
        continue
      displayDistractors(choices)
      print(f"  Ans : {result['answer']}")
    print("")
  except Exception as e:
    print(f'Error in output, {e}')

Results for test-0 : 
PARAGRAPH-0
Paging is a memory management scheme that eliminates the need for contiguous allocation of physical memory. 
This scheme permits the physical address space of a process to be non – contiguous.
Logical Address or Virtual Address (represented in bits): An address generated by the CPU
Logical Address Space or Virtual Address Space( represented in words or bytes): The set of all logical addresses generated by a program
Physical Address (represented in bits): An address actually available on memory unit
Physical Address Space (represented in words or bytes): The set of all physical addresses corresponding to the logical addresses

The hardware implementation of page table can be done by using dedicated registers. But the usage of register for the page table is satisfactory only if page table is small. If page table contain large number of entries then we can use TLB(translation Look-aside buffer), a special, small, fast look up hardware cache.

    The TLB 

## Test and Play Area

In [None]:
#@title Question Generator

paragraph = "Swami Vivekananda's inspiring personality was well known both in India and in America during the last decade of the nineteenth century and the first decade of the twentieth. The unknown monk of India suddenly leapt into fame at the Parliament of Religions held in Chicago in 1893, at which he represented Hinduism. His vast knowledge of Eastern and Western culture as well as his deep spiritual insight, fervid eloquence, brilliant conversation, broad human sympathy, colourful personality, and handsome figure made an irresistible appeal to the many types of Americans who came in contact with him. People who saw or heard Vivekananda even once still cherish his memory after a lapse of more than half a century.  In America Vivekananda's mission was the interpretation of India's spiritual culture, especially in its Vedantic setting. He also tried to enrich the religious consciousness of the Americans through the rational and humanistic teachings of the Vedanta philosophy. In America he became India's spiritual ambassador and pleaded eloquently for better understanding between India and the New World in order to create a healthy synthesis of East and West, of religion and science.  In his own motherland Vivekananda is regarded as the patriot saint of modern India and an inspirer of her dormant national consciousness, To the Hindus he preached the ideal of a strength-giving and man-making religion. Service to man as the visible manifestation of the Godhead was the special form of worship he advocated for the Indians, devoted as they were to the rituals and myths of their ancient faith. Many political leaders of India have publicly acknowledged their indebtedness to Swami Vivekananda.  The Swami's mission was both national and international. A lover of mankind, he strove to promote peace and human brotherhood on the spiritual foundation of the Vedantic Oneness of existence. A mystic of the highest order, Vivekananda had a direct and intuitive experience of Reality. He derived his ideas from that unfailing source of wisdom and often presented them in the soul stirring language of poetry.  The natural tendency of Vivekananda's mind, like that of his Master, Ramakrishna, was to soar above the world and forget itself in contemplation of the Absolute. But another part of his personality bled at the sight of human suffering in East and West alike. It might appear that his mind seldom found a point of rest in its oscillation between contemplation of God and service to man. Be that as it may, he chose, in obedience to a higher call, service to man as his mission on earth; and this choice has endeared him to people in the West, Americans in particular.  In the course of a short life of thirty-nine years (1863-1902), of which only ten were devoted to public activities-and those, too, in the midst of acute physical suffering-he left for posterity his four classics: Jnana-Yoga, Bhakti-Yoga, Karma-Yoga, and Raja-Yoga, all of which are outstanding treatises on Hindu philosophy. In addition, he delivered innumerable lectures, wrote inspired letters in his own hand to his many friends and disciples, composed numerous poems, and acted as spiritual guide to the many seekers, who came to him for instruction. He also organized the Ramakrishna Order of monks, which is the most outstanding religious organization of modern India. It is devoted to the propagation of the Hindu spiritual culture not only in the Swami's native land, but also in America and in other parts of the world.  Swami Vivekananda once spoke of himself as a \"condensed India.\" His life and teachings are of inestimable value to the West for an understanding of the mind of Asia. William James, the Harvard philosopher, called the Swami the \"paragon of Vedantists.\" Max Muller and Paul Deussen, the famous Orientalists of the nineteenth century, held him in genuine respect and affection. \"His words,\" writes Romain Rolland, \"are great music, phrases in the style of Beethoven, stirring rhythms like the march of Handel choruses. I cannot touch these sayings of his, scattered as they are through the pages of books, at thirty years' distance, without receiving a thrill through my body like an electric shock. And what shocks, what transports, must have been produced when in burning words they issued from the lips of the hero!''  In 1881 Narendra first met Ramakrishna, who became his spiritual focus after his own father had died in 1884. Narendra's first introduction to Ramakrishna occurred in a literature class at General Assembly's Institution when he heard Professor William Hastie lecturing on William Wordsworth's poem, The Excursion. While explaining the word \"trance\" in the poem, Hastie suggested that his students visit Ramakrishna of Dakshineswar to understand the true meaning of trance. This prompted some of his students (including Narendra) to visit Ramakrishna Ramakrishna, guru of Vivekananda  Vivekananda in Cossipore 1886 They probably first met personally in November 1881 though Narendra did not consider this their first meeting, and neither man mentioned this meeting later. At this time, Narendra was preparing for his upcoming F. A. examination, when Ram Chandra Datta accompanied him to Surendra Nath Mitra's, house where Ramakrishna was invited to deliver a lecture.According to Paranjape, at this meeting Ramakrishna asked young Narendra to sing. Impressed by his singing talent, he asked Narendra to come to Dakshineshwar. In late 1881 or early 1882, Narendra went to Dakshineswar with two friends and met Ramakrishna. This meeting proved to be a turning point in his life.Although he did not initially accept Ramakrishna as his teacher and rebelled against his ideas, he was attracted by his personality and began to frequently visit him at Dakshineswar. He initially saw Ramakrishna's ecstasies and visions as \"mere figments of imagination\" and \"hallucinations\".As a member of Brahmo Samaj, he opposed idol worship, polytheism and Ramakrishna's worship of Kali.He even rejected the Advaita Vedanta of \"identity with the absolute\" as blasphemy and madness, and often ridiculed the idea. Narendra tested Ramakrishna, who faced his arguments patiently: \"Try to see the truth from all angles\", he replied. Narendra's father's sudden death in 1884 left the family bankrupt; creditors began demanding the repayment of loans, and relatives threatened to evict the family from their ancestral home. Narendra, once a son of a well-to-do family, became one of the poorest students in his college.He unsuccessfully tried to find work and questioned God's existence, but found solace in Ramakrishna and his visits to Dakshineswar increased. One day, Narendra requested Ramakrishna to pray to goddess Kali for their family's financial welfare. Ramakrishna suggested him to go to the temple himself and pray. Following Ramakrishna's suggestion, he went to the temple thrice, but failed to pray for any kind of worldly necessities and ultimately prayed for true knowledge and devotion from the goddess.Narendra gradually grew ready to renounce everything for the sake of realising God, and accepted Ramakrishna as his Guru. In 1885, Ramakrishna developed throat cancer, and was transferred to Calcutta and (later) to a garden house in Cossipore. Narendra and Ramakrishna's other disciples took care of him during his last days, and Narendra's spiritual education continued. At Cossipore, he experienced Nirvikalpa samadhi.Narendra and several other disciples received ochre robes from Ramakrishna, forming his first monastic order.He was taught that service to men was the most effective worship of God. Ramakrishna asked him to care for the other monastic disciples, and in turn asked them to see Narendra as their leader.Ramakrishna died in the early-morning hours of 16 August 1886 in Cossipore." #@param {type:"string"}

results = base_qaqg_nlp(paragraph)

# print("RESULTS")
# for result in results:
#   print(f"Q: {result['question']}")
#   print(f"A: {result['answer']}")
#   print()
try:
  print("RESULTS")
  for result in results:
    if len(result['answer'].split()) > 1 :
      continue
    print(f"Q: {result['question']}")
    choices = [result['answer']]
    distractors = generateDistractors(result['answer'], 4)
    choices = choices + distractors[1:4]
    if len(choices) == 1:
      continue
    displayDistractors(choices)
    print(f"  Ans : {result['answer']}")
except:
  print('try another')

RESULTS
Q: Vivekananda's mission was the interpretation of India's spiritual culture, especially in what setting?
  a) Vedantic
  b) cārvāka
  c) mahayana
  d) upanishadic
  Ans : Vedantic
Q: What country was Vivekananda's spiritual ambassador in America?
  a) bangladesh
  b) delhi
  c) pakistan
  d) India
  Ans : India
Q: Who had a direct and intuitive experience of Reality?
  a) sarada
  b) Vivekananda
  c) ramakrishna
  d) ashram
  Ans : Vivekananda
Q: Who was Swami Vivekananda's Master?
  a) belur
  b) sarada
  c) swami
  d) Ramakrishna
  Ans : Ramakrishna


In [None]:
test_para = """Swami Vivekananda's inspiring personality was well known both in India and in America during the last decade of the nineteenth century and the first decade of the twentieth. The unknown monk of India suddenly leapt into fame at the Parliament of Religions held in Chicago in 1893, at which he represented Hinduism. His vast knowledge of Eastern and Western culture as well as his deep spiritual insight, fervid eloquence, brilliant conversation, broad human sympathy, colourful personality, and handsome figure made an irresistible appeal to the many types of Americans who came in contact with him. People who saw or heard Vivekananda even once still cherish his memory after a lapse of more than half a century.  In America Vivekananda's mission was the interpretation of India's spiritual culture, especially in its Vedantic setting. He also tried to enrich the religious consciousness of the Americans through the rational and humanistic teachings of the Vedanta philosophy. In America he became India's spiritual ambassador and pleaded eloquently for better understanding between India and the New World in order to create a healthy synthesis of East and West, of religion and science.  In his own motherland Vivekananda is regarded as the patriot saint of modern India and an inspirer of her dormant national consciousness, To the Hindus he preached the ideal of a strength-giving and man-making religion. Service to man as the visible manifestation of the Godhead was the special form of worship he advocated for the Indians, devoted as they were to the rituals and myths of their ancient faith. Many political leaders of India have publicly acknowledged their indebtedness to Swami Vivekananda.  The Swami's mission was both national and international. A lover of mankind, he strove to promote peace and human brotherhood on the spiritual foundation of the Vedantic Oneness of existence. A mystic of the highest order, Vivekananda had a direct and intuitive experience of Reality. He derived his ideas from that unfailing source of wisdom and often presented them in the soul stirring language of poetry.  The natural tendency of Vivekananda's mind, like that of his Master, Ramakrishna, was to soar above the world and forget itself in contemplation of the Absolute. But another part of his personality bled at the sight of human suffering in East and West alike. It might appear that his mind seldom found a point of rest in its oscillation between contemplation of God and service to man. Be that as it may, he chose, in obedience to a higher call, service to man as his mission on earth; and this choice has endeared him to people in the West, Americans in particular.  In the course of a short life of thirty-nine years (1863-1902), of which only ten were devoted to public activities-and those, too, in the midst of acute physical suffering-he left for posterity his four classics: Jnana-Yoga, Bhakti-Yoga, Karma-Yoga, and Raja-Yoga, all of which are outstanding treatises on Hindu philosophy. In addition, he delivered innumerable lectures, wrote inspired letters in his own hand to his many friends and disciples, composed numerous poems, and acted as spiritual guide to the many seekers, who came to him for instruction. He also organized the Ramakrishna Order of monks, which is the most outstanding religious organization of modern India. It is devoted to the propagation of the Hindu spiritual culture not only in the Swami's native land, but also in America and in other parts of the world.  Swami Vivekananda once spoke of himself as a "condensed India." His life and teachings are of inestimable value to the West for an understanding of the mind of Asia. William James, the Harvard philosopher, called the Swami the "paragon of Vedantists." Max Muller and Paul Deussen, the famous Orientalists of the nineteenth century, held him in genuine respect and affection. "His words," writes Romain Rolland, "are great music, phrases in the style of Beethoven, stirring rhythms like the march of Handel choruses. I cannot touch these sayings of his, scattered as they are through the pages of books, at thirty years' distance, without receiving a thrill through my body like an electric shock. And what shocks, what transports, must have been produced when in burning words they issued from the lips of the hero!''  In 1881 Narendra first met Ramakrishna, who became his spiritual focus after his own father had died in 1884. Narendra's first introduction to Ramakrishna occurred in a literature class at General Assembly's Institution when he heard Professor William Hastie lecturing on William Wordsworth's poem, The Excursion. While explaining the word "trance" in the poem, Hastie suggested that his students visit Ramakrishna of Dakshineswar to understand the true meaning of trance. This prompted some of his students (including Narendra) to visit Ramakrishna Ramakrishna, guru of Vivekananda  Vivekananda in Cossipore 1886 They probably first met personally in November 1881 though Narendra did not consider this their first meeting, and neither man mentioned this meeting later. At this time, Narendra was preparing for his upcoming F. A. examination, when Ram Chandra Datta accompanied him to Surendra Nath Mitra's, house where Ramakrishna was invited to deliver a lecture.According to Paranjape, at this meeting Ramakrishna asked young Narendra to sing. Impressed by his singing talent, he asked Narendra to come to Dakshineshwar. In late 1881 or early 1882, Narendra went to Dakshineswar with two friends and met Ramakrishna. This meeting proved to be a turning point in his life.Although he did not initially accept Ramakrishna as his teacher and rebelled against his ideas, he was attracted by his personality and began to frequently visit him at Dakshineswar. He initially saw Ramakrishna's ecstasies and visions as "mere figments of imagination" and "hallucinations".As a member of Brahmo Samaj, he opposed idol worship, polytheism and Ramakrishna's worship of Kali.He even rejected the Advaita Vedanta of "identity with the absolute" as blasphemy and madness, and often ridiculed the idea. Narendra tested Ramakrishna, who faced his arguments patiently: "Try to see the truth from all angles", he replied. Narendra's father's sudden death in 1884 left the family bankrupt; creditors began demanding the repayment of loans, and relatives threatened to evict the family from their ancestral home. Narendra, once a son of a well-to-do family, became one of the poorest students in his college.He unsuccessfully tried to find work and questioned God's existence, but found solace in Ramakrishna and his visits to Dakshineswar increased. One day, Narendra requested Ramakrishna to pray to goddess Kali for their family's financial welfare. Ramakrishna suggested him to go to the temple himself and pray. Following Ramakrishna's suggestion, he went to the temple thrice, but failed to pray for any kind of worldly necessities and ultimately prayed for true knowledge and devotion from the goddess.Narendra gradually grew ready to renounce everything for the sake of realising God, and accepted Ramakrishna as his Guru. In 1885, Ramakrishna developed throat cancer, and was transferred to Calcutta and (later) to a garden house in Cossipore. Narendra and Ramakrishna's other disciples took care of him during his last days, and Narendra's spiritual education continued. At Cossipore, he experienced Nirvikalpa samadhi.Narendra and several other disciples received ochre robes from Ramakrishna, forming his first monastic order.He was taught that service to men was the most effective worship of God. Ramakrishna asked him to care for the other monastic disciples, and in turn asked them to see Narendra as their leader.Ramakrishna died in the early-morning hours of 16 August 1886 in Cossipore."""

## Text Summarization

### Using GPT - 2

In [None]:
# Importing model and tokenizer
from transformers import GPT2Tokenizer,GPT2LMHeadModel

# Instantiating the model and tokenizer with gpt-2
tokenizer=GPT2Tokenizer.from_pretrained('gpt2')
model=GPT2LMHeadModel.from_pretrained('gpt2')

# Encoding text to get input ids & pass them to model.generate()
inputs=tokenizer.batch_encode_plus([test_para],return_tensors='pt',max_length=512)
summary_ids=model.generate(inputs['input_ids'],early_stopping=True)

# Decoding and printing summary
GPT_summary=tokenizer.decode(summary_ids[0],skip_special_tokens=True)
print(GPT_summary)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1042301.0, style=ProgressStyle(descript…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=456318.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=665.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=548118077.0, style=ProgressStyle(descri…




Some weights of GPT2LMHeadModel were not initialized from the model checkpoint at gpt2 and are newly initialized: ['h.0.attn.masked_bias', 'h.1.attn.masked_bias', 'h.2.attn.masked_bias', 'h.3.attn.masked_bias', 'h.4.attn.masked_bias', 'h.5.attn.masked_bias', 'h.6.attn.masked_bias', 'h.7.attn.masked_bias', 'h.8.attn.masked_bias', 'h.9.attn.masked_bias', 'h.10.attn.masked_bias', 'h.11.attn.masked_bias', 'lm_head.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Truncation was not explicitely activated but `max_length` is provided a specific value, please use `truncation=True` to explicitely truncate examples to max length. Defaulting to 'only_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you may want to check this is the right behavior.
Setting `pad_token_id` to 50256 (first `eos_token_id`) to generate sequence


Swami Vivekananda's inspiring personality was well known both in India and in America during the last decade of the nineteenth century and the first decade of the twentieth. The unknown monk of India suddenly leapt into fame at the Parliament of Religions held in Chicago in 1893, at which he represented Hinduism. His vast knowledge of Eastern and Western culture as well as his deep spiritual insight, fervid eloquence, brilliant conversation, broad human sympathy, colourful personality, and handsome figure made an irresistible appeal to the many types of Americans who came in contact with him. People who saw or heard Vivekananda even once still cherish his memory after a lapse of more than half a century.  In America Vivekananda's mission was the interpretation of India's spiritual culture, especially in its Vedantic setting. He also tried to enrich the religious consciousness of the Americans through the rational and humanistic teachings of the Vedanta philosophy. In America he became 

In [None]:
print(GPT_summary)

Swami Vivekananda's inspiring personality was well known both in India and in America during the last decade of the nineteenth century and the first decade of the twentieth. The unknown monk of India suddenly leapt into fame at the Parliament of Religions held in Chicago in 1893, at which he represented Hinduism. His vast knowledge of Eastern and Western culture as well as his deep spiritual insight, fervid eloquence, brilliant conversation, broad human sympathy, colourful personality, and handsome figure made an irresistible appeal to the many types of Americans who came in contact with him. People who saw or heard Vivekananda even once still cherish his memory after a lapse of more than half a century.  In America Vivekananda's mission was the interpretation of India's spiritual culture, especially in its Vedantic setting. He also tried to enrich the religious consciousness of the Americans through the rational and humanistic teachings of the Vedanta philosophy. In America he became 

### Using T5

In [None]:
import torch
import json
from transformers import T5Tokenizer, T5ForConditionalGeneration, T5Config
use_cuda = True
model = T5ForConditionalGeneration.from_pretrained('t5-base')
tokenizer = T5Tokenizer.from_pretrained('t5-base')
device = torch.device('cpu')
# device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu"

preprocess_text = test_para.strip().replace("\n","")
t5_prepared_Text = "summarize: "+ test_para
print ("original text preprocessed: \n", test_para)

tokenized_text = tokenizer.encode(t5_prepared_Text, return_tensors="pt", max_length=512).to(device)


# summmarize
summary_ids = model.generate(tokenized_text,
                                    num_beams=4,
                                    no_repeat_ngram_size=2,
                                    min_length=30,
                                    max_length=100,
                                    num_return_sequences=3,
                                    early_stopping=True)

output = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

print ("\n\nSummarized text: \n",output)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1199.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=891691430.0, style=ProgressStyle(descri…




Some weights of T5ForConditionalGeneration were not initialized from the model checkpoint at t5-base and are newly initialized: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight', 'lm_head.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=791656.0, style=ProgressStyle(descripti…

Truncation was not explicitely activated but `max_length` is provided a specific value, please use `truncation=True` to explicitely truncate examples to max length. Defaulting to 'only_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you may want to check this is the right behavior.



original text preprocessed: 
 Swami Vivekananda's inspiring personality was well known both in India and in America during the last decade of the nineteenth century and the first decade of the twentieth. The unknown monk of India suddenly leapt into fame at the Parliament of Religions held in Chicago in 1893, at which he represented Hinduism. His vast knowledge of Eastern and Western culture as well as his deep spiritual insight, fervid eloquence, brilliant conversation, broad human sympathy, colourful personality, and handsome figure made an irresistible appeal to the many types of Americans who came in contact with him. People who saw or heard Vivekananda even once still cherish his memory after a lapse of more than half a century.  In America Vivekananda's mission was the interpretation of India's spiritual culture, especially in its Vedantic setting. He also tried to enrich the religious consciousness of the Americans through the rational and humanistic teachings of the Vedanta ph

In [None]:
t5_prepared_Text = "question: "+ test_para
print ("original text preprocessed: \n", test_para)

tokenized_text = tokenizer.encode(t5_prepared_Text, return_tensors="pt", max_length=512).to(device)

# summmarize
summary_ids = model.generate(tokenized_text,
                                    num_beams=4,
                                    no_repeat_ngram_size=2,
                                    min_length=30,
                                    max_length=100,
                                    early_stopping=True)

output = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

print ("\n\nSummarized text: \n",output)

Truncation was not explicitely activated but `max_length` is provided a specific value, please use `truncation=True` to explicitely truncate examples to max length. Defaulting to 'only_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you may want to check this is the right behavior.


original text preprocessed: 
 Swami Vivekananda's inspiring personality was well known both in India and in America during the last decade of the nineteenth century and the first decade of the twentieth. The unknown monk of India suddenly leapt into fame at the Parliament of Religions held in Chicago in 1893, at which he represented Hinduism. His vast knowledge of Eastern and Western culture as well as his deep spiritual insight, fervid eloquence, brilliant conversation, broad human sympathy, colourful personality, and handsome figure made an irresistible appeal to the many types of Americans who came in contact with him. People who saw or heard Vivekananda even once still cherish his memory after a lapse of more than half a century.  In America Vivekananda's mission was the interpretation of India's spiritual culture, especially in its Vedantic setting. He also tried to enrich the religious consciousness of the Americans through the rational and humanistic teachings of the Vedanta phi

## C-Profiling : Attempt to find the bottlenecks

In [None]:
import cProfile
import pstats
from functools import wraps


def profile(output_file=None, sort_by='cumulative', lines_to_print=None, strip_dirs=False):
    """A time profiler decorator.
    Inspired by and modified the profile decorator of Giampaolo Rodola:
    http://code.activestate.com/recipes/577817-profile-decorator/
    Args:
        output_file: str or None. Default is None
            Path of the output file. If only name of the file is given, it's
            saved in the current directory.
            If it's None, the name of the decorated function is used.
        sort_by: str or SortKey enum or tuple/list of str/SortKey enum
            Sorting criteria for the Stats object.
            For a list of valid string and SortKey refer to:
            https://docs.python.org/3/library/profile.html#pstats.Stats.sort_stats
        lines_to_print: int or None
            Number of lines to print. Default (None) is for all the lines.
            This is useful in reducing the size of the printout, especially
            that sorting by 'cumulative', the time consuming operations
            are printed toward the top of the file.
        strip_dirs: bool
            Whether to remove the leading path info from file names.
            This is also useful in reducing the size of the printout
    Returns:
        Profile of the decorated function
    """

    def inner(func):
        @wraps(func)
        def wrapper(*args, **kwargs):
            _output_file = output_file or func.__name__ + '.prof'
            pr = cProfile.Profile()
            pr.enable()
            retval = func(*args, **kwargs)
            pr.disable()
            pr.dump_stats(_output_file)

            with open(_output_file, 'w') as f:
                ps = pstats.Stats(pr, stream=f)
                if strip_dirs:
                    ps.strip_dirs()
                if isinstance(sort_by, (tuple, list)):
                    ps.sort_stats(*sort_by)
                else:
                    ps.sort_stats(sort_by)
                ps.print_stats(lines_to_print)
            return retval

        return wrapper

    return inner

In [None]:
import cProfile
import io
import pstats
def profile2(func):
    def wrapper(*args, **kwargs):
        pr = cProfile.Profile()
        pr.enable()
        retval = func(*args, **kwargs)
        pr.disable()
        s = io.StringIO()
        sortby = 'tottime' #'cumulative'
        ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
        ps.print_stats()
        print(s.getvalue())
        return retval

    return wrapper

In [None]:
# @profile(sort_by='cumulative', lines_to_print=100, strip_dirs=True)
@profile2
def get_results(para):
  results = base_qaqg_nlp(paragraph)
  try:
    print("RESULTS")
    for result in results:
      if len(result['answer'].split()) > 1 :
        continue
      print(f"Q: {result['question']}")
      choices = [result['answer']]
      distractors = generateDistractors(result['answer'], 4)
      choices = choices + distractors[1:4]
      if len(choices) == 1:
        continue
      displayDistractors(choices)
      print(f"  Ans : {result['answer']}")
  except:
    print('try another')

In [None]:
# import cProfile
# cProfile.run("get_results(test_para)")
get_results(test_para)

RESULTS
Q: Vivekananda's mission was the interpretation of India's spiritual culture, especially in what setting?
Q: What country was Vivekananda's spiritual ambassador in America?
Q: Who had a direct and intuitive experience of Reality?
Q: Who was Swami Vivekananda's Master?
         2287269 function calls (2270566 primitive calls) in 11.455 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
     1954    6.227    0.003    6.227    0.003 {method 'to' of 'torch._C._TensorBase' objects}
     1641    1.483    0.001    1.483    0.001 {method 'item' of 'torch._C._TensorBase' objects}
        1    1.251    1.251    1.616    1.616 /usr/local/lib/python3.7/dist-packages/transformers/modeling_utils.py:1255(_generate_no_beam_search)
   198534    0.193    0.000    0.647    0.000 /usr/local/lib/python3.7/dist-packages/transformers/tokenization_t5.py:175(_convert_token_to_id)
     4266    0.157    0.000    0.157    0.000 {built-in method t