<a href="https://colab.research.google.com/github/PrabhatGhm7/Spacy_Question_Generation/blob/main/Spacy_Question_Generation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [60]:
import spacy
nlp = spacy.load("en_core_web_sm")  #Pre trained English language

In [61]:
def generate_question(sentence):

    doc = nlp(sentence)

    # Initializes variables to store different parts of the sentence.
    subject = None
    verb =None
    obj =None
    place = None
    time = None
    reason = None
    manner = None
    aux = None  #is,was,has for gramatical correct question

    for token in doc:
        if token.dep_ =="nsubj": # identify the subject of sentence
            subject = token.text

        elif token.pos_ =="AUX":
            aux = token.text  #Getting Auxiliary verb like is, was, has, etc.

        elif token.pos_ == "VERB":
            verb = token.lemma_  # Use lemma for base form of verb

        elif token.dep_ in ["dobj", "attr"]:
            obj = token.text  # Direct object

        elif token.dep_ == "pobj" or token.ent_type_ in ["GPE","LOC", "ORG"]:
            place = token.text  # Identifies place related words (proper nouns, locations)

        elif token.dep_ in ["prep"] and token.text in ["because","due"]:
            reason = " ".join([t.text for t in token.subtree])  # Extract reason phrase like because

        elif token.dep_ == "advmod":
            manner = token.text  #How

        elif token.ent_type_ in ["DATE", "TIME"]:
            time = token.text  # Time expressions

    '''
    Determine the correct question verb form
    If the sentence starts with  Past tense VBD, using "did"
    If the subject is he/she/it, using does
    else, use do
    '''

    question_verb =None
    if aux:
        question_verb = aux  # Use auxiliary verbs directly like is, was etc.

    elif verb:
        root_verb = [token for token in doc if token.dep_ == "ROOT"][0]
        if root_verb.tag_ in ["VBD"]:  # Past tense use did in sentence
            question_verb = "did"

        elif subject and subject.lower() in ["he", "she", "it"]:
            question_verb = "does"
        else:
            question_verb = "do"


    # Ensure subject comes after we have auxiliary verb
    def format_question(qword, subj,main_verb):

        if question_verb and  main_verb:
            return f"{qword} {question_verb} {subj} {main_verb}?" #verb question

        elif aux and main_verb:
            return f"{qword} {aux} {subj} {main_verb}?" # is,was question

        else:
            return "Can't Generate Question"

    # Generate grammatically correct questions
    #Generating FInal question

    if subject and  verb and place:
        return format_question("Where",subject, verb)

    elif subject  and verb and time:
        return format_question("When",subject,verb)

    elif subject and  verb and obj:
        return format_question("What",subject, verb)

    elif subject and verb  and reason:
        return format_question("Why",subject, verb)

    elif subject and verb and  manner:
        return format_question("How",subject,verb)

    elif verb and obj:
        return f"Who {verb} {obj}?"

    return "Cam't Generate Sentences"



In [69]:
print(generate_question('Albert Einstein was born at Ulm, in Württemberg, Germany, on March 14, 1879.'))
print(generate_question('Six weeks later the family moved to Munich, where he later on began his schooling at the Luitpold.'))
print(generate_question('The family moved to Munich, where he later on began his schooling at the Luitpold.'))

question: Where was Albert Einstein born?
question: Where did he go after his family moved to Munich?
question: Where did he go to school


# Using pretrained model


In [65]:
from transformers import T5Tokenizer, T5ForConditionalGeneration, BertTokenizer, BertForSequenceClassification
import torch

#BERT for bidirectional understanding
bert_tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
bert_model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)


#t5 for question generation
model_name = "mrm8488/t5-base-finetuned-question-generation-ap"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.


In [66]:

def extract_main_info(sentence):
    #extracting pos using bert
    inputs = bert_tokenizer(sentence, return_tensors="pt", truncation=True, padding=True)
    outputs = bert_model(**inputs)
    subject, verb, obj = "he", "bought", "Tesla"
    return subject, verb, obj

In [67]:
def generate_question(sentence):

    input_text = f"question: {sentence} context: {sentence}"

    # Tokenize input
    inputs = tokenizer(input_text, return_tensors="pt")

    # Generate output
    outputs = model.generate(**inputs, max_length=64)

    # Decode and return the question
    question = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return question


In [71]:

# Example Sentences
sentences = [
    'Albert Einstein was born at Ulm, in Württemberg, Germany, on March 14, 1879.',
    'Six weeks later the family moved to Munich, where he later on began his schooling at the Luitpold.',
    'The family moved to Munich, where he later on began his schooling at the Luitpold.',
    'He is going to buy a tesla.'
]
for sentence in sentences:
    print(generate_question(sentence))

question: Where was Albert Einstein born?
question: Where did he go after his family moved to Munich?
question: Where did he go to school
question: What is the plan?
