Mount from google drive

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Check the location where model is saved

In [2]:
!ls "/content/drive/My Drive/MCQ Question Generation/saved_model"

config.json		model.safetensors	 tokenizer_config.json
generation_config.json	special_tokens_map.json  tokenizer.json


Install Necessary Libraries

In [3]:
!pip install transformers torch flask flask-cors accelerate

Collecting flask-cors
  Downloading flask_cors-5.0.1-py3-none-any.whl.metadata (961 bytes)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Col

Load the model

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Path to your fine-tuned model
model_path = "/content/drive/My Drive/MCQ Question Generation/saved_model"

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16, device_map="auto").cuda()

print("Model Loaded Successfully!")

Unrecognized keys in `rope_scaling` for 'rope_type'='llama3': {'name'}


Model Loaded Successfully!


Write mcq_server.py to generate questions using the model

In [4]:
%%writefile mcq_server.py
from flask import Flask, request, jsonify
from flask_cors import CORS
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import re
import random

# Initialize Flask App
app = Flask(__name__)
CORS(app)

# Load Fine-Tuned Model from Google Drive
model_path = "/content/drive/My Drive/MCQ Question Generation/saved_model"

try:
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16, device_map="auto").cuda()
    print("✅ Model Loaded Successfully!")
except Exception as e:
    print(f"🔥 Error Loading Model: {e}")

# 🔥 Function to Extract MCQ
def extract_mcq(prompt, generated_text):
    """ Extracts the MCQ content (Question, Options A-E, Correct Answer) from model output. """
    generated_text = generated_text.replace(prompt, "").strip()
    lines = [line.strip() for line in generated_text.split("\n") if line.strip()]

    question = None
    options = {}
    correct_answer = None

    for line in lines:
        # Extract question
        if not question and re.match(r"^Question:\s*(.*)", line, re.IGNORECASE):
            question = re.match(r"^Question:\s*(.*)", line, re.IGNORECASE).group(1).strip()

        # Extract options (A-E)
        elif re.match(r"^[A-E]\)", line):
            option_letter = line[0]  # Extracts "A", "B", etc.
            option_text = line[3:].strip()
            options[option_letter] = option_text

        # Extract correct answer
        elif re.search(r"Correct Answer:\s*([A-E])", line, re.IGNORECASE):
            correct_answer_match = re.search(r"Correct Answer:\s*([A-E])", line, re.IGNORECASE)
            if correct_answer_match:
                correct_answer = correct_answer_match.group(1).strip()

    # Ensure exactly 5 options
    if len(options) != 5:
        print("⚠ Invalid MCQ: Missing or duplicate answer choices.")
        return {"error": "Invalid MCQ format - Missing or duplicate options"}

    # Validate correct answer
    if correct_answer not in options:
        print("⚠ Correct answer not found in options.")
        correct_answer = "Unknown"

    if not question:
        return {"error": "Invalid MCQ format - Question missing"}

    return {
        "question": question,
        "options": options,
        "correct_answer": correct_answer
    }

# 🔥 Route to Generate MCQ
@app.route('/generate_mcq', methods=['POST'])
def generate_mcq():
    """Generates an MCQ using the fine-tuned model and ensures a valid format."""
    try:
        data = request.json
        prompt_base = data.get("prompt", "Generate a multiple-choice question.")

        # **Randomized prompt phrasing**
        prompt_variations = [
            f"{prompt_base}",
            f"{prompt_base} Ensure clear question and answer choices.",
            f"{prompt_base} Make sure the question is unique.",
            f"{prompt_base} Ensure all 5 answers are distinct.",
            f"{prompt_base} The correct answer must be one of the given choices."
        ]
        prompt = random.choice(prompt_variations)

        print(f"📩 Received Prompt: {prompt}")

        # **Generate output**
        inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
        outputs = model.generate(
            **inputs,
            max_new_tokens=300,
            pad_token_id=tokenizer.eos_token_id,
            do_sample=True,
            top_k=40,
            top_p=0.92,
            temperature=random.choice([0.7, 0.8, 0.9])  # ✅ Randomized temperature for unique output
        )
        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

        # **Extract and validate MCQ**
        mcq_data = extract_mcq(prompt, generated_text)

        if "error" in mcq_data:
            print(f"⚠ Retrying due to error: {mcq_data['error']}")
            return generate_mcq()  # Retry if MCQ is invalid

        print(f"✅ Extracted MCQ:\n{mcq_data}")

        return jsonify({"mcq": mcq_data})

    except Exception as e:
        print(f"🔥 ERROR: {str(e)}")
        return jsonify({"error": str(e)}), 500

# **🔥 Start Flask Server**
if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5000, debug=True)


Writing mcq_server.py


Run the flask app inside the mcq_server.py file in the background

In [6]:
!nohup python3 mcq_server.py > flask_output.log 2>&1 &

Save the ngrok autentication token to secrets

In [9]:
!ngrok authtoken 2tA1v9sv3quWDHtzTUepVLlL6Ka_372iA7FzbW7bYaphJ6QNA

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


Install necessary ngrok library

In [8]:
!pip install pyngrok
from pyngrok import ngrok

Collecting pyngrok
  Downloading pyngrok-7.2.3-py3-none-any.whl.metadata (8.7 kB)
Downloading pyngrok-7.2.3-py3-none-any.whl (23 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.3


Kill the ngrok process if any existing process running. And start a new url to get an API to localhost

In [10]:
ngrok.kill()
public_url = ngrok.connect("5000", "http")
print(f"🌍 Your API is available at: {public_url}")

🌍 Your API is available at: NgrokTunnel: "https://dce8-34-16-168-90.ngrok-free.app" -> "http://localhost:5000"


Kill any existing process running

In [None]:
!kill -9 $(lsof -t -i:5000)


kill: usage: kill [-s sigspec | -n signum | -sigspec] pid | jobspec ... or kill -l [sigspec]


Check the output of the mcq_server.py upto last 50 lines

In [7]:
!tail -n 50 flask_output.log

Unrecognized keys in `rope_scaling` for 'rope_type'='llama3': {'name'}
✅ Model Loaded Successfully!
 * Serving Flask app 'mcq_server'
 * Debug mode: on
 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://172.28.0.12:5000
[33mPress CTRL+C to quit[0m
 * Restarting with stat
Unrecognized keys in `rope_scaling` for 'rope_type'='llama3': {'name'}
 * Debugger is active!
 * Debugger PIN: 712-022-854


Display all the running processes

In [None]:
!ps aux | grep python

root          66  2.2  0.0      0     0 ?        Z    09:45   0:15 [python3] <defunct>
root          67  0.0  0.3  63772 50932 ?        S    09:45   0:00 python3 /usr/local/bin/colab-file
root         116  0.6  0.9 371152 127768 ?       Sl   09:45   0:04 /usr/bin/python3 /usr/local/bin/j
root         197  0.8  0.7 1188216 106032 ?      Ssl  09:46   0:05 /usr/bin/python3 -m colab_kernel_
root         232  0.2  0.1 544884 20424 ?        Sl   09:46   0:01 /usr/bin/python3 /usr/local/lib/p
root        1365  0.0  0.0  20884 12932 ?        S    09:49   0:00 python3 /opt/google/drive/drive-f
root        3468  0.0  0.0   7376  3440 ?        S    09:57   0:00 /bin/bash -c ps aux | grep python
root        3470  0.0  0.0   6484  2424 ?        S    09:57   0:00 grep python


In [None]:
!kill -9 1601

Display Tensorflow version

In [None]:
import tensorflow as tf
print(tf.__version__)
print(tf.sysconfig.get_build_info())


2.18.0
OrderedDict([('cpu_compiler', '/usr/lib/llvm-18/bin/clang'), ('cuda_compute_capabilities', ['sm_60', 'sm_70', 'sm_80', 'sm_89', 'compute_90']), ('cuda_version', '12.5.1'), ('cudnn_version', '9'), ('is_cuda_build', True), ('is_rocm_build', False), ('is_tensorrt_build', False)])


Unistall tensorflow so the model will efficiently load

In [5]:
!pip uninstall -y tensorflow tensorflow-gpu

Found existing installation: tensorflow 2.18.0
Uninstalling tensorflow-2.18.0:
  Successfully uninstalled tensorflow-2.18.0
[0m