<a href="https://colab.research.google.com/github/Shubh4545/Shubh4545/blob/main/nlp%20MCQ%20Generation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
import nltk
import random
from nltk.tokenize import sent_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from transformers import pipeline

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [10]:
def get_mca_questions(context):
    # Tokenize the text into sentences
    sentences = sent_tokenize(context)
    
    # Preprocess the sentences
    processed_sentences = preprocess_sentences(sentences)
    
    # Use TF-IDF to find the most important sentence
    important_sentence = get_most_important_sentence(processed_sentences)
    
    # Generate multiple-choice questions based on the important sentence
    mca_questions = generate_mca_questions(important_sentence)
    
    return mca_questions

def preprocess_sentences(sentences):
    stop_words = set(stopwords.words('english'))
    lemmatizer = WordNetLemmatizer()
    processed_sentences = []
    
    for sentence in sentences:
        # Tokenize the sentence into words
        words = nltk.word_tokenize(sentence.lower())
        
        # Remove stop words and lemmatize the words
        words = [lemmatizer.lemmatize(word) for word in words if word.isalnum() and word not in stop_words]
        
        # Convert the words back to sentence
        processed_sentence = ' '.join(words)
        
        processed_sentences.append(processed_sentence)
    
    return processed_sentences

def get_most_important_sentence(processed_sentences):
    # Initialize TF-IDF vectorizer
    vectorizer = TfidfVectorizer()
    
    # Compute TF-IDF matrix
    tfidf_matrix = vectorizer.fit_transform(processed_sentences)
    
    # Compute cosine similarity between sentences
    similarity_matrix = cosine_similarity(tfidf_matrix)
    
    # Find the most important sentence based on cosine similarity
    important_sentence_index = similarity_matrix.sum(axis=1).argmax()
    
    return processed_sentences[important_sentence_index]



def generate_mca_questions(important_sentence):
    # Initialize OpenAI GPT-3.5 pipeline for question generation
    generator = pipeline('text-generation', model='EleutherAI/gpt-neo-1.3B')

    # Generate multiple-choice questions based on the important sentence
    num_questions = 3
    questions = []
    for _ in range(num_questions):
        # Generate the question
        question = generator(important_sentence, max_length=30, num_return_sequences=1)[0]['generated_text']
        question = question.split('?')[0] + '?'

        # Generate the options
        options = generate_options(important_sentence)
        
        # Randomly select the correct answers
        correct_answers = random.sample(options, 2)
        
        # Create the correct options string
        correct_options = "Correct Options: "
        for i, answer in enumerate(correct_answers, 1):
            correct_options += f"({chr(97 + i)}) {answer} "
        
        # Shuffle the options
        random.shuffle(options)
        
        # Create the question string with shuffled options
        question_with_options = question + '\n'
        for i, option in enumerate(options, 1):
            question_with_options += f"{chr(97 + i)}. {option}\n"
        
        # Append the question to the list
        questions.append((question_with_options, correct_options))
    
    return questions


def generate_options(important_sentence):
    # Split the important sentence into words
    words = important_sentence.split()
    
    # Shuffle the words
    random.shuffle(words)
    
    # Select two words as correct answers
    correct_answers = random.sample(words, 2)
    
    # Create options with two correct answers and two incorrect answers
    options = correct_answers + random.sample([word for word in words if word not in correct_answers], 2)
    
    # Shuffle the options
    random.shuffle(options)
    
    return options

# Example usage
context = "Photosynthesis is a process used by plants and other organisms to convert light energy into chemical energy that, through cellular respiration, can later be released to fuel the organism's activities. Some of this chemical energy is stored in carbohydrate molecules, such as sugars and starches, which are synthesized from carbon dioxide and water. Most plants, algae, and cyanobacteria perform photosynthesis."

mca_questions = get_mca_questions(context)
for i, (question, correct_options) in enumerate(mca_questions, 1):
    print(f"Q{i}: {question}")
    print(f"{correct_options}\n")


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Q1: photosynthesis process used plant organism convert light energy chemical energy cellular respiration later released fuel organism activity and maintenance. These processes are important and must be maintained?
b. cellular
c. respiration
d. organism
e. chemical

Correct Options: (b) respiration (c) chemical 

Q2: photosynthesis process used plant organism convert light energy chemical energy cellular respiration later released fuel organism activity. This phenomenon is an important process to plant life process?
b. convert
c. organism
d. activity
e. chemical

Correct Options: (b) activity (c) organism 

Q3: photosynthesis process used plant organism convert light energy chemical energy cellular respiration later released fuel organism activity carbon dioxide CO2

This will cause to make?
b. process
c. activity
d. cellular
e. organism

Correct Options: (b) cellular (c) activity 

