In [14]:
import random # STEP 3 
import re
from nltk.tokenize import sent_tokenize, word_tokenize
import spacy

# Load the spaCy model for named entity recognition
nlp = spacy.load("en_core_web_sm")

def truncate_sentence(sentence, max_words=10):
    """Limit sentence length to a maximum number of words."""
    words = word_tokenize(sentence)
    if len(words) > max_words:
        return ' '.join(words[:max_words]) + '...'
    return sentence

def generate_diverse_questions(sentences, num_questions=5, max_words=10):
    """Generate exactly 10 diverse and meaningful questions from a list of sentences."""
    questions = set()  # Use a set to avoid duplicate questions

    while len(questions) < num_questions:
        for sentence in sentences:
            if len(questions) >= num_questions:
                break

            # Analyze the sentence with spaCy to extract entities
            doc = nlp(sentence)
            entities = [(ent.text, ent.label_) for ent in doc.ents]

            # 1. General Questions
            general_starter = random.choice([
                "What is the meaning of",
                "Why is it important to understand",
                "How does",
                "What is the role of",
                "Explain the significance of"
            ])
            truncated_sentence = truncate_sentence(sentence, max_words=max_words)
            questions.add(f"{general_starter} {truncated_sentence.strip()}?")

            # 2. Entity-based Questions
            for entity, label in entities:
                questions.add(f"What role does '{entity}' play in {truncated_sentence.strip()}?")
                if label in ["PERSON", "ORG", "GPE"]:  # Person, Organization, or Geopolitical entity
                    questions.add(f"How does '{entity}' influence the events in {truncated_sentence.strip()}?")
                    questions.add(f"What is the significance of '{entity}' in the context of {truncated_sentence.strip()}?")

            # 3. Why and How Questions (focusing on reasons or methods)
            questions.add(f"Why is {truncated_sentence.strip()} important?")
            questions.add(f"How can {truncated_sentence.strip()} be applied in real life?")

            # 4. Hypothetical or Exploratory Questions
            questions.add(f"What would happen if {truncated_sentence.strip()} didn't occur?")
            questions.add(f"What are the potential consequences of {truncated_sentence.strip()}?")

    return list(questions)

def generate_questions_from_text(tokenized_text, num_questions=10, max_words=20):
    """Generate 10 meaningful and different questions from the tokenized text of the book."""
    sentences = sent_tokenize(tokenized_text)
    generated_questions = generate_diverse_questions(sentences, num_questions=num_questions, max_words=max_words)
    return generated_questions

# Example usage for identifying the book and generating questions
prompt = input("Please enter your prompt: ")  # Ask the user to input a prompt
book_title, similarity_score = identify_book(prompt)
print(f"Most Relevant Book: {book_title}, Similarity Score: {similarity_score:.4f}")

# Check if the book exists in the DataFrame and retrieve the tokenized text
if book_title in books_df['Title'].values:
    tokenized_text = books_df.loc[books_df['Title'] == book_title, 'Tokenized Text'].values
    if len(tokenized_text) > 0:
        tokenized_text = tokenized_text[0]  # Access the text

        # Generate 10 meaningful and different questions from the tokenized text
        generated_questions = generate_questions_from_text(tokenized_text, num_questions=5, max_words=10)
        
        # Print the generated questions
        print("\nGenerated Questions:")
        for i, question in enumerate(generated_questions, 1):
            print(f"Q{i}: {question}")
    else:
        print(f"No tokenized text found for the book '{book_title}'.")
else:
    print(f"Book title '{book_title}' not found in the dataset.")


Most Relevant Book: GandhiAutobio_morallessons, Similarity Score: 0.4629

Generated Questions:
Q1: How does 'mahatma' influence the events in gandhi autobiography moral lesson gangrade published gandhi smriti darshan samiti...?
Q2: What is the significance of 'absence gandhi' in the context of gandhi autobiography moral lesson gangrade published gandhi smriti darshan samiti...?
Q3: What role does 'new delhi' play in gandhi autobiography moral lesson gangrade published gandhi smriti darshan samiti...?
Q4: What role does 'lesson gandhi' play in gandhi autobiography moral lesson gangrade published gandhi smriti darshan samiti...?
Q5: What role does 'gymnastic' play in gandhi autobiography moral lesson gangrade published gandhi smriti darshan samiti...?
Q6: What would happen if gandhi autobiography moral lesson gangrade published gandhi smriti darshan samiti... didn't occur?
Q7: What is the significance of 'listens gandhi' in the context of gandhi autobiography moral lesson gangrade publis