In [None]:
# Import libraries
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM, BloomForCausalLM, BloomTokenizerFast
from fastapi import FastAPI, Request, Form
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
import uvicorn
import nest_asyncio
from typing import Optional
import textwrap
import torch
from huggingface_hub import login

In [None]:
# Login to Hugging Face (use your API token)
hf_token = "xxxxxxxxxxxxxx"  # Replace with your token
login(token=hf_token)

In [6]:
# Load models (Flan-T5 for general questions, BLOOM for technical)
flan_model = "google/flan-t5-large"
bloom_model = "bigscience/bloom-560m"

tokenizer_flan = AutoTokenizer.from_pretrained(flan_model)
model_flan = AutoModelForSeq2SeqLM.from_pretrained(flan_model, device_map="auto")

tokenizer_bloom = BloomTokenizerFast.from_pretrained(bloom_model)
model_bloom = BloomForCausalLM.from_pretrained(bloom_model, device_map="auto")

Error while fetching `HF_TOKEN` secret value from your vault: 'Requesting secret HF_TOKEN timed out. Secrets can only be fetched when running from the Colab UI.'.
You are not authenticated with the Hugging Face Hub in this notebook.
If the error persists, please let us know by opening an issue on GitHub (https://github.com/huggingface/huggingface_hub/issues/new).


tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/3.13G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/222 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/14.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/85.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/693 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.12G [00:00<?, ?B/s]

In [11]:
# Initialize pipelines
flan_pipeline = pipeline(
    "text2text-generation",
    model=model_flan,
    tokenizer=tokenizer_flan,
    # Remove the device argument here
    # device="cuda" if torch.cuda.is_available() else "cpu"
)

bloom_pipeline = pipeline(
    "text-generation",
    model=model_bloom,
    tokenizer=tokenizer_bloom,
    # Remove the device argument here
    # device="cuda" if torch.cuda.is_available() else "cpu"
)

Device set to use cpu
Device set to use cpu


In [13]:
# FastAPI App
import os

# Ensure the static directory exists
if not os.path.exists("static"):
    os.makedirs("static")

app = FastAPI()
app.mount("/static", StaticFiles(directory="static"), name="static")
templates = Jinja2Templates(directory="templates")

In [14]:
# Prompt templates
GENERAL_PROMPT = """
Generate {num_questions} interview questions for a {role} with {experience} years of experience.
Focus on general behavioral and situational questions.
"""

TECHNICAL_PROMPT = """
Generate {num_questions} technical interview questions for a {role} (focus: {skills}).
Assume the candidate has {experience} years of experience.
"""

In [15]:
# FastAPI Endpoints
@app.post("/generate_questions")
async def generate_questions(
    role: str = Form(...),
    experience: int = Form(1),
    skills: str = Form(""),
    num_questions: int = Form(5),
    question_type: str = Form("general"),
):
    if question_type == "general":
        prompt = GENERAL_PROMPT.format(
            role=role, experience=experience, num_questions=num_questions
        )
        questions = flan_pipeline(
            prompt,
            max_length=200,
            num_return_sequences=1,
            temperature=0.7,
        )[0]["generated_text"]
    else:
        prompt = TECHNICAL_PROMPT.format(
            role=role, experience=experience, skills=skills, num_questions=num_questions
        )
        questions = bloom_pipeline(
            prompt,
            max_length=200,
            num_return_sequences=1,
            temperature=0.7,
        )[0]["generated_text"]

    return {"questions": questions.split("\n")}

In [None]:
# prompt: instead of steamlit use gradio and generate the code

!pip install -q gradio
import gradio as gr
import requests

def generate_questions(role, experience, skills, question_type, num_questions):
    url = "http://localhost:8000/generate_questions"
    data = {
        "role": role,
        "experience": experience,
        "skills": skills,
        "num_questions": num_questions,
        "question_type": question_type.lower(),
    }
    try:
        response = requests.post(url, data=data).json()
        return "\n".join(response["questions"])
    except requests.exceptions.RequestException as e:
        return f"Error generating questions: {e}"

iface = gr.Interface(
    fn=generate_questions,
    inputs=[
        gr.Textbox(label="Job Role (e.g., Data Scientist)", value="Data Scientist"),
        gr.Slider(minimum=1, maximum=20, step=1, label="Years of Experience", value=3),
        gr.Textbox(label="Key Skills (comma-separated)", value="Python, SQL, Machine Learning"),
        gr.Radio(["General", "Technical"], label="Question Type", value="General"),
        gr.Slider(minimum=1, maximum=10, step=1, label="Number of Questions", value=5),
    ],
    outputs=gr.Textbox(label="Generated Questions"),
    title="AI Interview Question Generator",
    description="Get personalized questions based on role and experience",
)

# Run FastAPI server in background (keep the previous code for FastAPI)
import nest_asyncio
nest_asyncio.apply()
import uvicorn
import threading

def run_fastapi():
    uvicorn.run(app, host="0.0.0.0", port=8000)

fastapi_thread = threading.Thread(target=run_fastapi)
fastapi_thread.start()

# Launch Gradio interface
iface.launch(debug=True)


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.2/54.2 MB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m323.1/323.1 kB[0m [31m19.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.6/11.6 MB[0m [31m93.3 MB/s[0m eta [36m0:00:00[0m
[?25hIt looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://ce4c7e485085a55d95.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/sp

Created dataset file at: .gradio/flagged/dataset1.csv


Generated Questions:

1. Explain how you’d deploy a PyTorch model on AWS SageMaker.

2. Design a pipeline for fine-tuning BERT on a custom NLP dataset.

3. How would you handle class imbalance in a multi-label text classification task?