In [None]:
!pip install -U transformers
!pip install sentencepiece
!python -m nltk.downloader punkt
!pip install langdetect

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [None]:
text1 = "Gravity (from Latin gravitas, meaning 'weight'), or gravitation, is a natural phenomenon by which all \
things with mass or energy—including planets, stars, galaxies, and even light—are brought toward (or gravitate toward) \
one another. On Earth, gravity gives weight to physical objects, and the Moon's gravity causes the ocean tides. \
The gravitational attraction of the original gaseous matter present in the Universe caused it to begin coalescing \
and forming stars and caused the stars to group together into galaxies, so gravity is responsible for many of \
the large-scale structures in the Universe. Gravity has an infinite range, although its effects become increasingly \
weaker as objects get further away"



## Single task QA

In [None]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, LSTM, Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical

# Sample dataset
paragraphs = [
  "The sky is blue. The grass is green.",
  "The quick brown fox jumps over the lazy dog.",
  "Roses are red. Violets are blue."
]

# Function to convert a paragraph to a list of sentences
def paragraph_to_sentences(paragraph):
    return paragraph.split('. ')

# Convert the paragraphs to a list of sentences
sentences = []
for paragraph in paragraphs:

    sentences.extend(paragraph_to_sentences(paragraph))

# Tokenize the sentences
word2idx = {"<PAD>": 0, "<UNK>": 1}
idx2word = {0: "<PAD>", 1: "<UNK>"}
for sentence in sentences:
    for word in sentence.split():
        if word not in word2idx:
            word2idx[word] = len(word2idx)
            idx2word[len(idx2word)] = word

# Convert the sentences to sequences of word indices
sequences = []
for sentence in sentences:
    sequence = []
    for word in sentence.split():
        if word in word2idx:
            sequence.append(word2idx[word])
        else:
            sequence.append(word2idx["<UNK>"])
    sequences.append(sequence)

# Pad the sequences to a fixed length
max_len = max([len(sequence) for sequence in sequences])
padded_sequences = pad_sequences(sequences, maxlen=max_len, padding="post", value=word2idx["<PAD>"])

# Generate input and output data
X = padded_sequences[:, :-1]
y = padded_sequences[:, 1:]

y_onehot = np.zeros((len(sequences), max_len, len(word2idx)))
for i, sequence in enumerate(sequences):
    for j, word_idx in enumerate(sequence):
        y_onehot[i, j, word_idx] = 1
y = y_onehot[:, :-1, :]

# Build the LSTM model
model = Sequential()
model.add(Embedding(input_dim=len(word2idx), output_dim=50, input_length=max_len-1))
model.add(LSTM(50, return_sequences=True))
model.add(Dense(len(word2idx), activation="softmax"))
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
# Train the LSTM model
model.fit(X, y, epochs=100)

# Function to generate a question from a sentence
def generate_question(sentence):
    # Tokenize the sentence
    words = sentence.lower().split()
    # Convert the words to word indices
    sequence = []
    for word in words:
        if word in word2idx:
            sequence.append(word2idx[word])
    # Pad the sequence
    sequence = pad_sequences([sequence], maxlen=max_len-1)
    # Make the prediction
    prediction = model.predict(sequence)
    # Convert the prediction to a word
    predicted_word_idx = np.argmax(prediction)
    predicted_word = idx2word.get(predicted_word_idx, "")
    # Generate the question
    if predicted_word:
        question = f"What is {predicted_word} in the sentence \"{sentence}\"?"
    else:
        question = ""
    return question

# Test the function
sentence = "The sky is blue."
question = generate_question(sentence)
print(question)

Epoch 1/100




In [None]:
import itertools
import logging
from typing import Optional, Dict, Union
from nltk import sent_tokenize
from langdetect import detect
import torch
from transformers import(
    AutoModelForSeq2SeqLM,
    AutoTokenizer,
    PreTrainedModel,
    PreTrainedTokenizer,
)
logger = logging.getLogger(__name__)
import random
import nltk
from nltk.corpus import wordnet
from transformers import pipeline
nltk.download('wordnet')
generator = pipeline('text-generation', model='gpt2')
!pip install googletrans==3.1.0a0
from googletrans import Translator
ans=[]
class QGPipeline:
    """Poor man's QG pipeline"""
    def __init__(
        self,
        model: PreTrainedModel,
        tokenizer: PreTrainedTokenizer,
        ans_model: PreTrainedModel,
        ans_tokenizer: PreTrainedTokenizer,
        qg_format: str,
        use_cuda: bool
    ):
        self.model = model
        self.tokenizer = tokenizer

        self.ans_model = ans_model
        self.ans_tokenizer = ans_tokenizer

        self.qg_format = qg_format

        self.device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu"
        self.model.to(self.device)

        if self.ans_model is not self.model:
            self.ans_model.to(self.device)

        assert self.model.__class__.__name__ in ["T5ForConditionalGeneration", "BartForConditionalGeneration"]

        if "T5ForConditionalGeneration" in self.model.__class__.__name__:
            self.model_type = "t5"
        else:
            self.model_type = "bart"

    #def generate_distractors(target_word, num_distractors):
        # Find synonyms, antonyms, hyponyms, and hypernyms of the target word


    def __call__(self, inputs: str):
        f=detect(inputs)
        t = Translator()
        x = t.translate(inputs)
        inputs=x.text
        inputs = " ".join(inputs.split())
        sents, answers = self._extract_answers(inputs)
        flat_answers = list(itertools.chain(*answers))

        if len(flat_answers) == 0:
          return []

        if self.qg_format == "prepend":
            qg_examples = self._prepare_inputs_for_qg_from_answers_prepend(inputs, answers)
        else:
            qg_examples = self._prepare_inputs_for_qg_from_answers_hl(sents, answers)

        qg_inputs = [example['source_text'] for example in qg_examples]
        questions = self._generate_questions(qg_inputs)
        for example, que in zip(qg_examples, questions):

            #distractors = generate_distractors(example['answer'], 3)
            if len(example['answer'])==0:
              continue
            words=example['answer'].split()
            last_word = words[-1]
            target_word=last_word
            num_distractors=3
            synonyms = set()
            antonyms = set()
            hyponyms = set()
            hypernyms = set()

            for syn in wordnet.synsets(target_word):
                for lemma in syn.lemmas():
                    synonyms.add(lemma.name())
                    if lemma.antonyms():
                        antonyms.add(lemma.antonyms()[0].name())
                for hypo in syn.hyponyms():
                    for lemma in hypo.lemmas():
                        hyponyms.add(lemma.name())
                for hyper in syn.hypernyms():
                    for lemma in hyper.lemmas():
                        hypernyms.add(lemma.name())

            # Generate candidate distractors using the language model
            candidates = list(synonyms.union(antonyms).union(hyponyms).union(hypernyms))
            distractors = []
            for candidate in candidates:
                if candidate != target_word:
                    try:
                        generated_text = generator(f"Which is more related to {target_word}? {target_word} or {candidate}", max_length=20, num_return_sequences=1, do_sample=True)[0]['generated_text'].strip()
                        distractors.append((candidate, generated_text))
                    except:
                        pass

            # Rank distractors by relevance and choose top N
            distractors.sort(key=lambda x: x[1])
            xx = t.translate(example['answer'],dest=f)
            example['answer']=xx.text
            xy=t.translate(que,dest=f)
            que=xy.text
            distractors= [d[0] for d in distractors[:num_distractors]]
            if len(distractors)==0 and f=='hi':
              distractors.append('जिजीविषा')
              distractors.append('प्रेमशक्त')
              distractors.append('तमक')
            elif len(distractors)==0:
              distractors.append('Morrow')
              distractors.append('Kerfuffle')
              distractors.append('Crapulous')
            for m in range(len(distractors)):
              mm=t.translate(distractors[m],dest=f)
              distractors[m]=mm.text
            index = random.randint(0, len(distractors))
            distractors.insert(index,example['answer'])
            output=[{'question': que, 'distractors':distractors,'answer': example['answer']}]
            ans.append(output)
            #print(output)
        for i in range(len(ans)):
            print(ans[i])
        ans1=ans.copy()
        ans.clear()
        #print(ans)
        return ans1
    def _generate_questions(self, inputs):
        inputs = self._tokenize(inputs, padding=True, truncation=True)

        outs = self.model.generate(
            input_ids=inputs['input_ids'].to(self.device),
            attention_mask=inputs['attention_mask'].to(self.device),
            max_length=32,
            num_beams=4,
        )

        questions = [self.tokenizer.decode(ids, skip_special_tokens=True) for ids in outs]
        return questions

    def _extract_answers(self, context):
        sents, inputs = self._prepare_inputs_for_ans_extraction(context)
        inputs = self._tokenize(inputs, padding=True, truncation=True)

        outs = self.ans_model.generate(
            input_ids=inputs['input_ids'].to(self.device),
            attention_mask=inputs['attention_mask'].to(self.device),
            max_length=32,
        )

        dec = [self.ans_tokenizer.decode(ids, skip_special_tokens=False) for ids in outs]
        answers = [item.split('<sep>') for item in dec]
        answers = [i[:-1] for i in answers]

        return sents, answers

    def _tokenize(self,
        inputs,
        padding=True,
        truncation=True,
        add_special_tokens=True,
        max_length=512
    ):
        inputs = self.tokenizer.batch_encode_plus(
            inputs,
            max_length=max_length,
            add_special_tokens=add_special_tokens,
            truncation=truncation,
            padding="max_length" if padding else False,
            pad_to_max_length=padding,
            return_tensors="pt"
        )
        return inputs

    def _prepare_inputs_for_ans_extraction(self, text):
        sents = sent_tokenize(text)

        inputs = []
        for i in range(len(sents)):
            source_text = "extract answers:"
            for j, sent in enumerate(sents):
                if i == j:
                    sent = "<hl> %s <hl>" % sent
                source_text = "%s %s" % (source_text, sent)
                source_text = source_text.strip()

            if self.model_type == "t5":
                source_text = source_text + " </s>"
            inputs.append(source_text)

        return sents, inputs

    def _prepare_inputs_for_qg_from_answers_hl(self, sents, answers):
        inputs = []
        #print(answers)
        #print(sents)
        for i, answer in enumerate(answers):
            if len(answer) == 0:
              continue
            for answer_text in answer:
                sent = sents[i]
                sent=sent.lower()
                sents_copy = sents[:]
                answer_text=answer_text[5:]
                answer_text=answer_text.lower()
                #print(answer_text)
                answer_text = answer_text.strip()

                ans_start_idx = sent.index(answer_text)

                sent = f"{sent[:ans_start_idx]} <hl> {answer_text} <hl> {sent[ans_start_idx + len(answer_text): ]}"
                sents_copy[i] = sent

                source_text = " ".join(sents_copy)
                source_text = f"generate question: {source_text}"
                if self.model_type == "t5":
                    source_text = source_text + " </s>"

                inputs.append({"answer": answer_text, "source_text": source_text})

        return inputs

    def _prepare_inputs_for_qg_from_answers_prepend(self, context, answers):
        flat_answers = list(itertools.chain(*answers))
        examples = []
        for answer in flat_answers:
            source_text = f"answer: {answer} context: {context}"
            if self.model_type == "t5":
                source_text = source_text + " </s>"

            examples.append({"answer": answer, "source_text": source_text})
        return examples


class MultiTaskQAQGPipeline(QGPipeline):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def __call__(self, inputs: Union[Dict, str]):
        if type(inputs) is str:
            # do qg
            return super().__call__(inputs)
        else:
            # do qa
            return self._extract_answer(inputs["question"], inputs["context"])

    def _prepare_inputs_for_qa(self, question, context):
        source_text = f"question: {question}  context: {context}"
        if self.model_type == "t5":
            source_text = source_text + " </s>"
        return  source_text

    def _extract_answer(self, question, context):
        source_text = self._prepare_inputs_for_qa(question, context)
        inputs = self._tokenize([source_text], padding=False)

        outs = self.model.generate(
            input_ids=inputs['input_ids'].to(self.device),
            attention_mask=inputs['attention_mask'].to(self.device),
            max_length=16,
        )

        answer = self.tokenizer.decode(outs[0], skip_special_tokens=True)
        return answer


class E2EQGPipeline:
    def __init__(
        self,
        model: PreTrainedModel,
        tokenizer: PreTrainedTokenizer,
        use_cuda: bool
    ) :

        self.model = model
        self.tokenizer = tokenizer

        self.device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu"
        self.model.to(self.device)

        assert self.model.__class__.__name__ in ["T5ForConditionalGeneration", "BartForConditionalGeneration"]

        if "T5ForConditionalGeneration" in self.model.__class__.__name__:
            self.model_type = "t5"
        else:
            self.model_type = "bart"

        self.default_generate_kwargs = {
            "max_length": 256,
            "num_beams": 4,
            "length_penalty": 1.5,
            "no_repeat_ngram_size": 3,
            "early_stopping": True,
        }

    def __call__(self, context: str, **generate_kwargs):
        inputs = self._prepare_inputs_for_e2e_qg(context)

        # TODO: when overrding default_generate_kwargs all other arguments need to be passsed
        # find a better way to do this
        if not generate_kwargs:
            generate_kwargs = self.default_generate_kwargs

        input_length = inputs["input_ids"].shape[-1]

        # max_length = generate_kwargs.get("max_length", 256)
        # if input_length < max_length:
        #     logger.warning(
        #         "Your max_length is set to {}, but you input_length is only {}. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=50)".format(
        #             max_length, input_length
        #         )
        #     )

        outs = self.model.generate(
            input_ids=inputs['input_ids'].to(self.device),
            attention_mask=inputs['attention_mask'].to(self.device),
            **generate_kwargs
        )

        prediction = self.tokenizer.decode(outs[0], skip_special_tokens=True)
        questions = prediction.split("<sep>")
        questions = [question.strip() for question in questions[:-1]]
        return questions

    def _prepare_inputs_for_e2e_qg(self, context):
        source_text = f"generate questions: {context}"
        if self.model_type == "t5":
            source_text = source_text + " </s>"

        inputs = self._tokenize([source_text], padding=False)
        return inputs

    def _tokenize(
        self,
        inputs,
        padding=True,
        truncation=True,
        add_special_tokens=True,
        max_length=512
    ):
        inputs = self.tokenizer.batch_encode_plus(
            inputs,
            max_length=max_length,
            add_special_tokens=add_special_tokens,
            truncation=truncation,
            padding="max_length" if padding else False,
            pad_to_max_length=padding,
            return_tensors="pt"
        )
        return inputs


SUPPORTED_TASKS = {
    "question-generation": {
        "impl": QGPipeline,
        "default": {
            "model": "valhalla/t5-small-qg-hl",
            "ans_model": "valhalla/t5-base-qa-qg-hl",
        }
    },
    "multitask-qa-qg": {
        "impl": MultiTaskQAQGPipeline,
        "default": {
            "model": "valhalla/t5-base-qa-qg-hl",
        }
    },
    "e2e-qg": {
        "impl": E2EQGPipeline,
        "default": {
            "model": "valhalla/t5-small-e2e-qg",
        }
    }
}

def pipeline(
    task: str,
    model = None,
    tokenizer: Optional[Union[str, PreTrainedTokenizer]] = None,
    qg_format: Optional[str] = "highlight",
    ans_model = None,
    ans_tokenizer: Optional[Union[str, PreTrainedTokenizer]] = None,
    use_cuda: Optional[bool] = True,
    **kwargs,
):
    # Retrieve the task
    if task not in SUPPORTED_TASKS:
        raise KeyError("Unknown task {}, available tasks are {}".format(task, list(SUPPORTED_TASKS.keys())))

    targeted_task = SUPPORTED_TASKS[task]
    task_class = targeted_task["impl"]

    # Use default model/config/tokenizer for the task if no model is provided
    if model is None:
        model = targeted_task["default"]["model"]

    # Try to infer tokenizer from model or config name (if provided as str)
    if tokenizer is None:
        if isinstance(model, str):
            tokenizer = model
        else:
            # Impossible to guest what is the right tokenizer here
            raise Exception(
                "Impossible to guess which tokenizer to use. "
                "Please provided a PretrainedTokenizer class or a path/identifier to a pretrained tokenizer."
            )

    # Instantiate tokenizer if needed
    if isinstance(tokenizer, (str, tuple)):
        if isinstance(tokenizer, tuple):
            # For tuple we have (tokenizer name, {kwargs})
            tokenizer = AutoTokenizer.from_pretrained(tokenizer[0], **tokenizer[1])
        else:
            tokenizer = AutoTokenizer.from_pretrained(tokenizer)

    # Instantiate model if needed
    if isinstance(model, str):
        model = AutoModelForSeq2SeqLM.from_pretrained(model)

    if task == "question-generation":
        if ans_model is None:
            # load default ans model
            ans_model = targeted_task["default"]["ans_model"]
            ans_tokenizer = AutoTokenizer.from_pretrained(ans_model)
            ans_model = AutoModelForSeq2SeqLM.from_pretrained(ans_model)
        else:
            # Try to infer tokenizer from model or config name (if provided as str)
            if ans_tokenizer is None:
                if isinstance(ans_model, str):
                    ans_tokenizer = ans_model
                else:
                    # Impossible to guest what is the right tokenizer here
                    raise Exception(
                        "Impossible to guess which tokenizer to use. "
                        "Please provided a PretrainedTokenizer class or a path/identifier to a pretrained tokenizer."
                    )

            # Instantiate tokenizer if needed
            if isinstance(ans_tokenizer, (str, tuple)):
                if isinstance(ans_tokenizer, tuple):
                    # For tuple we have (tokenizer name, {kwargs})
                    ans_tokenizer = AutoTokenizer.from_pretrained(ans_tokenizer[0], **ans_tokenizer[1])
                else:
                    ans_tokenizer = AutoTokenizer.from_pretrained(ans_tokenizer)

            if isinstance(ans_model, str):
                ans_model = AutoModelForSeq2SeqLM.from_pretrained(ans_model)

    if task == "e2e-qg":
        return task_class(model=model, tokenizer=tokenizer, use_cuda=use_cuda)
    elif task == "question-generation":
        return task_class(model=model, tokenizer=tokenizer, ans_model=ans_model, ans_tokenizer=ans_tokenizer, qg_format=qg_format, use_cuda=use_cuda)
    else:
        return task_class(model=model, tokenizer=tokenizer, ans_model=model, ans_tokenizer=tokenizer, qg_format=qg_format, use_cuda=use_cuda)
print(ans)
print(len(ans))


In [None]:
nlp = pipeline("question-generation")

In [None]:
answers = [['<pad> Python'], ['<pad> Guido van Rossum']]
sents = ['Python is an interpreted, high-level, general-purpose programming language.', "Created by Guido van Rossum and first released in 1991, Python's design philosophy emphasizes code readability with its notable use of significant whitespace."]



In [None]:
inputs = []
# print(answers)
# print(sents)
for i, answer in enumerate(answers):
  print(answer)
  if len(answer) == 0: continue
  for answer_text in answer:
      print(answer_text)
      sent = sents[i]
      sents_copy = sents[:]

      answer_text = answer_text.split(" ")[1]
      print(answer_text)
      print(sent)
      ans_start_idx = sent.index(answer_text)

      sent = f"{sent[:ans_start_idx]} <hl> {answer_text} <hl> {sent[ans_start_idx + len(answer_text): ]}"
      sents_copy[i] = sent

      source_text = " ".join(sents_copy)
      source_text = f"generate question: {source_text}"
      # if self.model_type == "t5":
      #     source_text = source_text + " </s>"

      inputs.append({"answer": answer_text, "source_text": source_text})

If you want to use the t5-base model, then pass the path through model parameter

In [None]:
nlp = pipeline("question-generation", model="valhalla/t5-base-qg-hl")

In [None]:
'''from flask import Flask, render_template, request
from transformers import pipeline
from pyngrok import ngrok

# Initialize the question generation pipeline with the specified model
nlp = pipeline("text2text-generation", model="valhalla/t5-base-qg-hl")

# Initialize Flask app
app = Flask(__name__)

# Function to generate question
def generate_question(sentence, answer):
    ans_start_idx = sentence.lower().find(answer.lower())
    if ans_start_idx == -1:
        return "Answer not found in the sentence."

    sent = f"{sentence[:ans_start_idx]} <hl> {answer} <hl> {sentence[ans_start_idx + len(answer):]}"
    source_text = f"generate question: {sent}"
    question = nlp(source_text)

    return question[0]['generated_text']

@app.route("/", methods=["GET", "POST"])
def index():
    if request.method == "POST":
        sentence = request.form["sentence"]
        answer = request.form["answer"]
        question = generate_question(sentence, answer)
        return render_template("index.html", question=question)
    return render_template("index.html", question=None)

if __name__ == "__main__":
    # Open a tunnel to the Flask app on port 5000
    public_url = ngrok.connect(5000)
    print(f"Your ngrok URL is: {public_url}")

    # Run Flask app on 0.0.0.0 to make it publicly accessible
    app.run(host="0.0.0.0", port=5000)
'''

In [None]:
!pip install pyngrok

# Paste your ngrok auth token here



In [None]:
from pyngrok import ngrok
ngrok.set_auth_token("2pKU3TumDND3Y2NiHmxpDpQI9od_6qaNnyqsSzh2LmFUCv93f")

In [None]:
from pyngrok import ngrok, conf
import logging

# Configure ngrok to log errors
conf.get_default().log_event_callback = lambda log: print(log)


In [None]:
# Doing the fuck work
!pip install flask pyngrok transformers


In [None]:
!pip install PyPDF2


In [None]:
from flask import Flask, render_template_string, request
from werkzeug.utils import secure_filename
from transformers import pipeline
from pyngrok import ngrok
import os
import PyPDF2
import random

# Initialize the pipelines
nlp_qg = pipeline("text2text-generation", model="valhalla/t5-base-qg-hl")
nlp_ner = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english", grouped_entities=True)

# Initialize Flask app
app = Flask(__name__)
app.config["UPLOAD_FOLDER"] = "./uploads"
os.makedirs(app.config["UPLOAD_FOLDER"], exist_ok=True)

# Function to extract text from PDF
def extract_text_from_pdf(pdf_path):
    text = ""
    with open(pdf_path, "rb") as file:
        reader = PyPDF2.PdfReader(file)
        for page in reader.pages:
            text += page.extract_text()
    return text

# Function to detect answers using NER
def detect_answers(sentence):
    entities = nlp_ner(sentence)
    return [entity['word'] for entity in entities]

# Function to generate MCQs
def generate_mcqs(text):
    sentences = text.split(". ")  # Split text into sentences
    mcqs = []
    for sentence in sentences:
        answers = detect_answers(sentence)
        for answer in answers:
            ans_start_idx = sentence.lower().find(answer.lower())
            sent = f"{sentence[:ans_start_idx]} <hl> {answer} <hl> {sentence[ans_start_idx + len(answer):]}"
            source_text = f"generate question: {sent}"
            question = nlp_qg(source_text)[0]['generated_text']

            # Generate multiple-choice options
            distractors = [ans for ans in answers if ans != answer]
            distractors = list(set(distractors))  # Ensure unique distractors
            random.shuffle(distractors)
            options = [answer] + distractors[:3]
            random.shuffle(options)  # Shuffle options

            mcqs.append({
                "question": question,
                "options": options,
                "answer": answer
            })
    return mcqs

# Function to generate only questions
def generate_questions(text):
    sentences = text.split(". ")  # Split text into sentences
    questions = []
    for sentence in sentences:
        answers = detect_answers(sentence)
        for answer in answers:
            ans_start_idx = sentence.lower().find(answer.lower())
            sent = f"{sentence[:ans_start_idx]} <hl> {answer} <hl> {sentence[ans_start_idx + len(answer):]}"
            source_text = f"generate question: {sent}"
            question = nlp_qg(source_text)[0]['generated_text']
            questions.append(question)
    return questions

# Flask route to handle text and PDF upload
@app.route("/", methods=["GET", "POST"])
def index():
    output = None
    option = None
    if request.method == "POST":
        option = request.form.get("generation_option", "questions")
        input_text = request.form.get("input_text", "").strip()
        text = input_text

        # Handle PDF file upload
        if "pdf_file" in request.files:
            pdf_file = request.files["pdf_file"]
            if pdf_file.filename != "":
                file_path = os.path.join(app.config["UPLOAD_FOLDER"], secure_filename(pdf_file.filename))
                pdf_file.save(file_path)
                text = extract_text_from_pdf(file_path)

        # Generate based on selected option
        if text:
            if option == "mcqs":
                output = generate_mcqs(text)
            else:
                output = generate_questions(text)

    # HTML Template
    html_template = """
    <!DOCTYPE html>
    <html lang="en">
    <head>
        <title>Interactive Question Generator</title>
        <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0-alpha1/dist/css/bootstrap.min.css" rel="stylesheet">
    </head>
    <body class="bg-light">
        <div class="container mt-5">
            <div class="card shadow">
                <div class="card-body">
                    <h2 class="text-center text-primary mb-4">Interactive Question Generator</h2>
                    <form method="POST" enctype="multipart/form-data">
                        <div class="mb-3">
                            <label for="generation_option" class="form-label">Select an Option:</label>
                            <div>
                                <input type="radio" id="mcqs" name="generation_option" value="mcqs" {% if option == 'mcqs' %}checked{% endif %}>
                                <label for="mcqs">Generate MCQs</label>
                                <input type="radio" id="questions" name="generation_option" value="questions" class="ms-3" {% if option == 'questions' %}checked{% endif %}>
                                <label for="questions">Only Generate Questions</label>
                            </div>
                        </div>
                        <div class="mb-3">
                            <label for="input_text" class="form-label">Enter Text:</label>
                            <textarea id="input_text" name="input_text" class="form-control" rows="4" placeholder="Type your text here..."></textarea>
                        </div>
                        <div class="mb-3">
                            <label for="pdf_file" class="form-label">Or Upload a PDF:</label>
                            <input type="file" id="pdf_file" name="pdf_file" class="form-control" accept=".pdf">
                        </div>
                        <button type="submit" class="btn btn-primary w-100">Generate</button>
                    </form>
                </div>
            </div>
            {% if output %}
                <div class="card mt-4 shadow">
                    <div class="card-body">
                        {% if option == 'mcqs' %}
                            <h3 class="text-success">Generated MCQs:</h3>
                            <ul class="list-group">
                                {% for mcq in output %}
                                    <li class="list-group-item">
                                        <strong>{{ mcq.question }}</strong>
                                        <ol>
                                        {% for option in mcq.options %}
                                            <li>{{ option }}</li>
                                        {% endfor %}
                                        </ol>
                                    </li>
                                {% endfor %}
                            </ul>
                        {% else %}
                            <h3 class="text-success">Generated Questions:</h3>
                            <ul class="list-group">
                                {% for question in output %}
                                    <li class="list-group-item">{{ question }}</li>
                                {% endfor %}
                            </ul>
                        {% endif %}
                    </div>
                </div>
            {% endif %}
        </div>
        <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0-alpha1/dist/js/bootstrap.bundle.min.js"></script>
    </body>
    </html>
    """
    return render_template_string(html_template, output=output, option=option)

# Start Flask server and ngrok tunnel
public_url = ngrok.connect(5001)
print("Access the app at:", public_url)

app.run(port=5001)


In [None]:
!ngrok tunnels list


ngrok - tunnel local ports to public URLs and inspect traffic

USAGE:
  ngrok [command] [flags]

AUTHOR:
  ngrok - <support@ngrok.com>

COMMANDS: 
  config          update or migrate ngrok's configuration file
  http            start an HTTP tunnel
  tcp             start a TCP tunnel
  tunnel          start a tunnel for use with a tunnel-group backend

EXAMPLES: 
  ngrok http 80                                                 # secure public URL for port 80 web server
  ngrok http --url baz.ngrok.dev 8080                           # port 8080 available at baz.ngrok.dev
  ngrok tcp 22                                                  # tunnel arbitrary TCP traffic to port 22
  ngrok http 80 --oauth=google --oauth-allow-email=foo@foo.com  # secure your app with oauth

Paid Features: 
  ngrok http 80 --url mydomain.com                              # run ngrok with your own custom domain
  ngrok http 80 --cidr-allow 2600:8c00::a03c:91ee:fe69:9695/32  # run ngrok with IP policy restrictions