In [None]:
# Install required packages
!pip install transformers datasets torch pandas ipywidgets
!pip install PyPDF2
!pip install python-docx



In [None]:
import torch
from transformers import T5ForConditionalGeneration, T5Tokenizer
import re
import random
from google.colab import files
import pandas as pd
import io
import os
from IPython.display import display, HTML, clear_output
import ipywidgets as widgets
import PyPDF2
import docx

In [None]:
class MCQGenerator:
    def __init__(self, model_name="google/flan-t5-large"):
        print("Loading model and tokenizer... This may take a moment.")
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.tokenizer = T5Tokenizer.from_pretrained(model_name)
        self.model = T5ForConditionalGeneration.from_pretrained(model_name).to(self.device)
        print(f"Model loaded successfully! Using {self.device}")

    def generate_question(self, context, max_length=64):
        input_text = f"generate question: {context}"
        input_ids = self.tokenizer(input_text, return_tensors="pt").input_ids.to(self.device)
        outputs = self.model.generate(input_ids, max_length=max_length)
        question = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        return question

    def extract_answer(self, context, question):

        sentences = re.split(r'(?<=[.!?])\s+', context)


        sentences = [s for s in sentences if len(s.split()) > 3]

        question_words = set(question.lower().split())
        best_sentence = None
        best_score = 0

        for sentence in sentences:
            sentence_words = set(sentence.lower().split())
            overlap = len(question_words.intersection(sentence_words))
            if overlap > best_score:
                best_score = overlap
                best_sentence = sentence

        if best_sentence:
            words = best_sentence.split()

            answer_length = min(len(words) // 2, 5)
            answer_start = random.randint(0, max(0, len(words) - answer_length))
            answer = " ".join(words[answer_start:answer_start + answer_length])
            return answer, best_sentence

        return "No appropriate answer found", ""

    def generate_distractors(self, context, answer, count=3):

        words = context.split()
        distractors = []


        answer_words = answer.split()
        if len(answer_words) > 1:
            for i in range(len(words) - len(answer_words)):
                phrase = " ".join(words[i:i + len(answer_words)])
                if phrase != answer and phrase not in distractors:
                    distractors.append(phrase)


        while len(distractors) < count:
            if len(answer_words) > 1:
                start = random.randint(0, max(0, len(words) - len(answer_words)))
                distractor = " ".join(words[start:start + len(answer_words)])
            else:
                distractor = random.choice(words)

            if distractor != answer and distractor not in distractors:
                distractors.append(distractor)


        return distractors[:count]

    def generate_mcq(self, context, num_questions=5):
        mcqs = []


        paragraphs = context.split('\n\n')
        paragraphs = [p for p in paragraphs if len(p.split()) > 10]


        if len(paragraphs) > 10:
            paragraphs = random.sample(paragraphs, 10)


        for i, paragraph in enumerate(paragraphs):
            if len(mcqs) >= num_questions:
                break
            if len(paragraph.split()) < 15:
                continue

            try:
                # Generate a question from the paragraph
                question = self.generate_question(paragraph)


                answer, source_sentence = self.extract_answer(paragraph, question)

                # Generate distractors
                distractors = self.generate_distractors(context, answer)


                options = distractors + [answer]
                random.shuffle(options)


                correct_index = options.index(answer)

                mcqs.append({
                    "question": question,
                    "options": options,
                    "correct_index": correct_index,
                    "source_text": source_sentence
                })

            except Exception as e:
                print(f"Error processing paragraph {i}: {e}")
                continue

        return mcqs

In [None]:
def extract_text_from_file(uploaded_file):
    """Extract text from the uploaded file"""
    filename = list(uploaded_file.keys())[0]
    content = uploaded_file[filename]['content']

    if filename.endswith('.txt'):

        text = content.decode('utf-8')
    elif filename.endswith('.pdf'):

        try:
            import PyPDF2
        except ImportError:
            !pip install PyPDF2
            import PyPDF2

        # Save the PDF temporarily
        with open('temp.pdf', 'wb') as f:
            f.write(content)


        pdf_reader = PyPDF2.PdfReader('temp.pdf')
        text = ""
        for page in pdf_reader.pages:
            text += page.extract_text() + "\n"


        os.remove('temp.pdf')
    elif filename.endswith('.docx'):
        # We need python-docx for DOCX files
        try:
            import docx
        except ImportError:
            !pip install python-docx
            import docx

        with open('temp.docx', 'wb') as f:
            f.write(content)

        doc = docx.Document('temp.docx')
        text = ""
        for para in doc.paragraphs:
            text += para.text + "\n"

        os.remove('temp.docx')
    else:
        text = "Unsupported file format. Please upload .txt, .pdf, or .docx files."

    return text

In [None]:
import textwrap

# Create the UI for the MCQ generator
def create_mcq_ui():

    upload_button = widgets.FileUpload(description='Upload File', accept='.txt,.pdf,.docx')

    num_questions = widgets.IntSlider(min=1, max=20, step=1, value=5, description='# of Questions:')

    model_selector = widgets.Dropdown(
        options=['google/flan-t5-large', 'google/flan-t5-base'],
        value='google/flan-t5-large',
        description='Model:',
    )

    generate_button = widgets.Button(description='Generate MCQs')
    output_area = widgets.Output()

    def handle_upload(change):
        if change['type'] == 'change' and change['name'] == 'value':
            for filename, data in change['new'].items():
                with output_area:
                    print(f"File uploaded: {filename}")

    upload_button.observe(handle_upload)

    def display_mcq(mcqs):
        for i, mcq in enumerate(mcqs, 1):

            wrapped_question = textwrap.fill(mcq['question'], width=80)
            print(f"Question {i}: {wrapped_question}\n")

            for j, option in enumerate(mcq['options']):

                wrapped_option = textwrap.fill(option, width=70,
                                              initial_indent="   ",
                                              subsequent_indent="      ")
                print(f"  {chr(65+j)}. {wrapped_option.lstrip()}")

            print(f"\nCorrect Answer: {chr(65+mcq['correct_index'])}")


    def on_generate_button_clicked(b):
        with output_area:
            clear_output()
            try:

                if not upload_button.value:
                    print("Please upload a file first.")
                    return

                text = extract_text_from_file(upload_button.value)

                if text.startswith("Unsupported file format"):
                    print(text)
                    return

                print(f"Initializing MCQ generator with model: {model_selector.value}")
                mcq_gen = MCQGenerator(model_name=model_selector.value)

                # Generate MCQs
                print(f"Generating {num_questions.value} questions from the uploaded text...")
                mcqs = mcq_gen.generate_mcq(text, num_questions.value)


                print(f"\nGenerated {len(mcqs)} multiple-choice questions:\n")
                display_mcq(mcqs)


                mcq_data = []
                for i, mcq in enumerate(mcqs, 1):
                    row = {
                        'Question': mcq['question'],
                        'Correct Answer': mcq['options'][mcq['correct_index']]
                    }
                    for j, option in enumerate(mcq['options']):
                        row[f'Option {chr(65+j)}'] = option
                    mcq_data.append(row)

                df = pd.DataFrame(mcq_data)


                export_button = widgets.Button(description='Export to CSV')
                display(export_button)

                def on_export_button_clicked(b):
                    csv_data = df.to_csv(index=False)
                    with open('mcq_questions.csv', 'w') as f:
                        f.write(csv_data)
                    files.download('mcq_questions.csv')

                export_button.on_click(on_export_button_clicked)

            except Exception as e:
                print(f"An error occurred: {str(e)}")

    generate_button.on_click(on_generate_button_clicked)

    # Layout the widgets
    header = widgets.HTML(value="<h2>MCQ Generator from Text Files</h2>")
    widgets_layout = widgets.VBox([
        header,
        widgets.HBox([upload_button]),
        widgets.HBox([model_selector, num_questions]),
        generate_button,
        output_area
    ])

    return widgets_layout

In [None]:
# Display the UI
display(create_mcq_ui())

VBox(children=(HTML(value='<h2>MCQ Generator from Text Files</h2>'), HBox(children=(FileUpload(value={}, accep…