<a href="https://colab.research.google.com/github/GayathriSanthakumar/IMPROVING-ONLINE-LEARNING-OUTCOMES_ML/blob/main/Untitled18.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import gradio as gr
import matplotlib.pyplot as plt


In [11]:
DATASET_FILE = "udemy_courses.csv"
COMPLETED_THRESHOLD = 70.0
WEIGHT_USER = 0.6
WEIGHT_COURSE = 0.4
N_RECOMMENDATIONS = 10
N_USERS = 200


In [12]:
df_courses = None
course_embeddings = None
df_user_history = None


In [14]:
from google.colab import files
uploaded = files.upload()


Saving udemy_courses.csv to udemy_courses (1).csv


In [15]:
def init_data():
    global df_courses, course_embeddings, df_user_history

    # Get uploaded file name
    dataset_file = list(uploaded.keys())[0]

    # Load dataset
    df_courses = pd.read_csv(dataset_file)

    # Validate required columns
    required_cols = ["course_id", "course_title", "subject", "level", "num_subscribers"]
    for col in required_cols:
        if col not in df_courses.columns:
            raise ValueError(f"Missing column: {col}")

    # Combine text columns
    df_courses["text"] = (
        df_courses["course_title"].astype(str) + " " +
        df_courses["subject"].astype(str) + " " +
        df_courses["level"].astype(str)
    )

    # Load BERT model
    model = SentenceTransformer("all-MiniLM-L6-v2")
    course_embeddings = model.encode(df_courses["text"].tolist())

    # Intrinsic course dropout risk
    max_sub = df_courses["num_subscribers"].max()
    df_courses["Intrinsic_Dropout_Rate"] = 100 - (
        (df_courses["num_subscribers"] / max_sub) * 100
    )

    # -------------------------
    # Generate User History
    # -------------------------
    np.random.seed(42)
    history = []

    for user_id in range(1, N_USERS + 1):

        n = np.random.randint(8, 20)
        courses = np.random.choice(
            df_courses["course_id"], n, replace=False
        )

        for c in courses:

            if user_id % 2 == 0:
                completion = np.random.uniform(65,100)
            else:
                completion = np.random.uniform(10,70)

            history.append({
                "User_ID": user_id,
                "Course_ID": c,
                "Actual_Completion (%)": completion
            })

    df_user_history = pd.DataFrame(history)
    df_user_history["Dropout (%)"] = 100 - df_user_history["Actual_Completion (%)"]

    print("Dataset Loaded Successfully!")
    print("Courses:", len(df_courses))
    print("Users:", N_USERS)

    return True


In [16]:
def classify_learner(user_avg_risk):
    if user_avg_risk < 30:
        return "Low Risk Learner"
    elif user_avg_risk < 60:
        return "Moderate Risk Learner"
    else:
        return "High Risk Learner"


In [17]:
def recommend_course_with_risk(user_id):

    user_history = df_user_history[df_user_history["User_ID"] == user_id]

    if user_history.empty:
        return "User not found",0,0,pd.DataFrame(),pd.DataFrame()

    user_avg_risk = user_history["Dropout (%)"].mean()

    # ðŸ”¥ NEW LINE (Learner Type)
    learner_type = classify_learner(user_avg_risk)

    completed = user_history[
        user_history["Actual_Completion (%)"] >= COMPLETED_THRESHOLD
    ]

    if completed.empty:
        return "User has no completed courses",0,0,user_history,pd.DataFrame()

    vectors = []
    for cid in completed["Course_ID"]:
        idx = df_courses.index[df_courses["course_id"]==cid][0]
        vectors.append(course_embeddings[idx])

    user_vector = np.mean(vectors,axis=0).reshape(1,-1)

    sims = cosine_similarity(user_vector,course_embeddings)[0]
    sim_series = pd.Series(sims,index=df_courses["course_id"])

    taken = user_history["Course_ID"].tolist()
    sim_series = sim_series.drop(taken,errors="ignore")

    top_ids = sim_series.nlargest(N_RECOMMENDATIONS*5).index

    recs = df_courses[df_courses["course_id"].isin(top_ids)].copy()
    recs["Similarity_Score"] = sim_series.loc[recs["course_id"]].values

    recs["Predicted_Dropout_Risk (%)"] = (
        WEIGHT_USER * user_avg_risk +
        WEIGHT_COURSE * recs["Intrinsic_Dropout_Rate"]
    )

    recs["Recommendation_Score"] = (
        recs["Similarity_Score"] -
        recs["Predicted_Dropout_Risk (%)"]/100
    )

    recs = recs.sort_values("Recommendation_Score",ascending=False)

    best = recs.iloc[0]
    predicted = best["Predicted_Dropout_Risk (%)"]

    message = f"""
### ðŸ¥‡ Best Personalized Course: {best['course_title']}

**Learner Profile:** {learner_type}

| Metric | Value |
|------|------|
| Subject | {best['subject']} |
| Level | {best['level']} |
| Similarity | {best['Similarity_Score']:.4f} |
| Predicted Risk | {predicted:.2f}% |
"""

    history_display = user_history[["Course_ID","Dropout (%)"]]

    top10 = recs.head(10)[
        ["course_title","subject","level",
         "Similarity_Score","Predicted_Dropout_Risk (%)"]
    ]

    return message,predicted,user_avg_risk,history_display,top10


In [18]:
def create_risk_meter(predicted,user_risk):

    fig,ax = plt.subplots(figsize=(4,1))
    ax.barh([0],[100],color="lightgray")
    ax.barh([0],[predicted],color="orange")
    ax.set_xlim(0,100)
    ax.set_yticks([])
    ax.set_title("Predicted Dropout Risk")
    ax.text(predicted,0,f"{predicted:.1f}%")

    return fig


In [19]:
def handle(uid):

    uid = int(uid)
    msg,pr,ur,hist,top10 = recommend_course_with_risk(uid)
    fig = create_risk_meter(pr,ur)

    return msg,ur,hist,top10,fig


In [20]:
if init_data():

    with gr.Blocks(theme=gr.themes.Soft(),
                   title="Risk-Adjusted Recommender") as demo:

        gr.Markdown("""
# Risk-Adjusted Course Recommendation Engine
### Advanced Content-Based Filtering with Dropout Propensity
Enter User ID (1-200)
""")

        with gr.Row():
            uid = gr.Textbox(value="1",label="Enter User ID")
            btn = gr.Button("Analyze User & Recommend Course")

        gr.HTML("<hr>")

        with gr.Tabs():

            with gr.TabItem("ðŸ¥‡ Final Recommendation & Risk Dashboard"):
                with gr.Row():
                    main_out = gr.Markdown()
                    risk_plot = gr.Plot()
                    user_prop = gr.Number(label="User Historical Dropout (%)")

            with gr.TabItem("ðŸ“œ User Course History"):
                hist_out = gr.DataFrame()

            with gr.TabItem("ðŸ“Š Top 10 Risk-Adjusted List"):
                top10_out = gr.DataFrame()

        btn.click(
            fn=handle,
            inputs=uid,
            outputs=[main_out,user_prop,hist_out,top10_out,risk_plot]
        )

    demo.launch(share=True)




Loading weights:   0%|          | 0/103 [00:00<?, ?it/s]

BertModel LOAD REPORT from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


Dataset Loaded Successfully!
Courses: 3678
Users: 200


  with gr.Blocks(theme=gr.themes.Soft(),


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://9c300d58c2de4fa255.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
