<a href="https://colab.research.google.com/github/Preeti-2327/Project/blob/main/question_paper_interface.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import gradio as gr
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression

# -----------------------------
# Load + Clean CSV
# -----------------------------
def load_and_clean(file):
    df = pd.read_csv(file.name, dtype=str)

    df.columns = [c.strip().lower() for c in df.columns]

    # Question column
    question_col = None
    for c in df.columns:
        if "question" in c or "text" in c:
            question_col = c
            break
    if question_col is None:
        question_col = df.columns[0]
    df = df.rename(columns={question_col: "question"})

    # Difficulty (BT level)
    diff_col = None
    for c in df.columns:
        if "bt" in c or "difficulty" in c or "level" in c:
            diff_col = c
            break
    if diff_col is None:
        df["difficulty"] = np.random.randint(1, 6, size=len(df))
    else:
        df = df.rename(columns={diff_col: "difficulty"})
        df["difficulty"] = df["difficulty"].astype(str).str.strip()

    # Marks
    mark_col = None
    for c in df.columns:
        if "mark" in c:
            mark_col = c
            break
    if mark_col is None:
        df["marks"] = np.random.choice([1, 2, 3, 4, 5], size=len(df))
    else:
        df = df.rename(columns={mark_col: "marks"})
        df["marks"] = pd.to_numeric(df["marks"], errors="coerce").fillna(2).astype(int)

    # Topic
    if "topic" not in df.columns:
        df["topic"] = "General"

    return df


# -----------------------------
# Train model
# -----------------------------
def train_model(df):
    X = df["question"].astype(str)
    y = df["difficulty"].astype(str)

    model = make_pipeline(
        TfidfVectorizer(),
        LogisticRegression(max_iter=1000)
    )
    model.fit(X, y)
    acc = (model.predict(X) == y).mean()

    return model, acc


# -----------------------------
# Generate Question Paper
# -----------------------------
def generate_paper(df, model, total_marks):

    def predict_difficulty(text):
        return str(model.predict([str(text)])[0])

    df2 = df.copy()

    if df2["difficulty"].isnull().any():
        df2["difficulty"] = df2["question"].apply(predict_difficulty)

    distribution = {"1":0.2,"2":0.25,"3":0.25,"4":0.2,"5":0.1}
    target_marks = {k: total_marks * v for k,v in distribution.items()}

    df2 = df2.sample(frac=1, random_state=42).reset_index(drop=True)
    selected = []
    marks_sum = 0

    for label, target in target_marks.items():
        pool = df2[df2["difficulty"].astype(str)==label].sort_values(by="marks", ascending=False)
        curr = 0
        for _, row in pool.iterrows():
            m = int(row["marks"])
            if curr + m <= target and marks_sum + m <= total_marks:
                selected.append(row)
                curr += m
                marks_sum += m
            if curr >= target or marks_sum >= total_marks:
                break

    # Fill remaining marks
    missing = total_marks - marks_sum
    if missing > 0:
        pool = df2[~df2.index.isin([r.name for r in selected])]
        pool = pool.sort_values(by="marks")
        for _, row in pool.iterrows():
            if row["marks"] <= missing:
                selected.append(row)
                missing -= row["marks"]
            if missing <= 0:
                break

    final_df = pd.DataFrame(selected).reset_index(drop=True)
    return final_df


# -----------------------------
# Gradio UI Logic
# -----------------------------
def interface(file, total_marks):

    df = load_and_clean(file)
    model, acc = train_model(df)

    paper = generate_paper(df, model, total_marks)

    filename = "generated_question_paper.csv"
    paper.to_csv(filename, index=False)

    return (
        f"Model Trained Successfully!\nTraining Accuracy: {acc:.2f}",
        paper,
        filename
    )


# -----------------------------
# Gradio UI
# -----------------------------
demo = gr.Interface(
    fn=interface,
    inputs=[
        gr.File(label="Upload Questions CSV"),
        gr.Number(label="Total Marks", value=50)
    ],
    outputs=[
        gr.Textbox(label="Training Details"),
        gr.Dataframe(label="Generated Question Paper"),
        gr.File(label="Download Paper")
    ],
    title="AI-Based Question Paper Generator",
    description="Upload your CSV → Train Model → Generate Question Paper automatically."
)

demo.launch()


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://3b92ca538d427b6e32.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


