<a href="https://colab.research.google.com/github/LuminousMayank/sih-timetable-db/blob/main/Timetable_Generator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import files

uploaded = files.upload()  # This will open a file chooser


In [None]:
!pip -q install fastapi uvicorn pyngrok nest_asyncio

# ----(set your ngrok token; get it free from dashboard.ngrok.com)----
NGROK_AUTH_TOKEN = "3358hjwb6ETSMQEwNnYgGuBOKo6_82Ch7g9xXrZvefKR2pKdn"  # <-- paste your token

from pyngrok import ngrok, conf
conf.get_default().auth_token = NGROK_AUTH_TOKEN



In [None]:
# === Cell B: Define the FastAPI model server (Pydantic v2 safe) ===
import nest_asyncio, json
from fastapi import FastAPI, HTTPException
from typing import Dict, Any, List

nest_asyncio.apply()

app = FastAPI(title="Timetable AI Model", version="1.0.0")

@app.get("/")
def root():
    return {"ok": True, "msg": "AI model is alive", "endpoints": ["/plan"]}

@app.post("/plan")
def plan(payload: Dict[str, Any]):
    """
    Expects the payload your backend sends from /timetable/preview.
    Returns: {"plan":[{"course_code":..., "weight":..., "reason":...}], "warnings":[...]}
    """
    try:
        student = payload.get("student", {})
        catalog = payload.get("catalog", {})
        courses: List[Dict[str, Any]] = catalog.get("courses", [])
        rules = catalog.get("rules", {})
        mandatory_codes: List[str] = rules.get("mandatory", [])

        sem = student.get("semester", 1)
        credit_min = int(student.get("credit_min", 18))
        credit_max = int(student.get("credit_max", 24))
        prefs = set([p.lower() for p in student.get("prefs", [])])

        by_code = {c["code"]: c for c in courses}
        warnings = []

        # 1) Add mandatory for this semester
        selected = []
        total_credits = 0
        for code in mandatory_codes:
            c = by_code.get(code)
            if c and (c.get("semester") == sem or c.get("semester") is None):
                selected.append(c)
                total_credits += int(c.get("credits", 0))

        # 2) Electives: same semester, prefer matching prefs
        def pref_score(c):
            text = f'{c.get("title","")} {c.get("basket","")}'.lower()
            return sum(1 for p in prefs if p in text)

        electives = [
            c for c in courses
            if c["code"] not in mandatory_codes and (c.get("semester") == sem or c.get("semester") is None)
        ]
        electives.sort(key=lambda c: (pref_score(c), -int(c.get("credits", 0))), reverse=True)

        for c in electives:
            cr = int(c.get("credits", 0))
            if total_credits + cr <= credit_max:
                selected.append(c)
                total_credits += cr

        # 3) If still below min, fill from other semesters
        if total_credits < credit_min:
            others = [c for c in courses if c["code"] not in {x["code"] for x in selected}]
            for c in others:
                cr = int(c.get("credits", 0))
                if total_credits + cr <= credit_max:
                    selected.append(c)
                    total_credits += cr
                if total_credits >= credit_min: break

        if total_credits < credit_min:
            warnings.append(f"Could not reach credit_min={credit_min}; total={total_credits}")

        plan = []
        for c in selected:
            reason_bits = []
            if c["code"] in mandatory_codes:
                reason_bits.append("mandatory")
            sc = pref_score(c)
            if sc > 0: reason_bits.append(f"matches {sc} preference(s)")
            if not reason_bits:
                reason_bits.append("fits credit window")

            plan.append({
                "course_code": c["code"],
                "weight": 1.0 if c["code"] in mandatory_codes else 0.5 + 0.1*pref_score(c),
                "reason": ", ".join(reason_bits)
            })

        return {"plan": plan, "warnings": warnings}

    except Exception as e:
        raise HTTPException(status_code=400, detail=f"Bad payload or server error: {e}")


In [None]:
!pip install PyPDF2


Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/232.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━[0m [32m225.3/232.6 kB[0m [31m7.6 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyPDF2
Successfully installed PyPDF2-3.0.1


In [None]:
import uvicorn, threading, time
from pyngrok import ngrok

# Start FastAPI (localhost)
def run_uvicorn():
    uvicorn.run(app, host="0.0.0.0", port=8000)

thread = threading.Thread(target=run_uvicorn, daemon=True)
thread.start()

# Give server a moment to spin up
time.sleep(2)

# Create public URL via ngrok
public_url = ngrok.connect(addr=8000, proto="http")
print("PUBLIC BASE URL:", public_url.public_url)
print("POST endpoint:", public_url.public_url + "/plan")


INFO:     Started server process [739]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


PUBLIC BASE URL: https://unmaternally-nontangible-matteo.ngrok-free.dev
POST endpoint: https://unmaternally-nontangible-matteo.ngrok-free.dev/plan


NameError: name 'parse_pdf_to_csv' is not defined

In [None]:
import pdfplumber
import pandas as pd

pdf_path = "/content/BTech_CSE_AIML_2022-23_19-09-25.pdf"

def parse_pdf_to_csv_structured(pdf_path, csv_path="structured_output.csv", max_pages=None):
    parsed_data = []

    with pdfplumber.open(pdf_path) as pdf:
        total_pages = len(pdf.pages)
        if max_pages:
            total_pages = min(total_pages, max_pages)

        for page_num in range(total_pages):
            page = pdf.pages[page_num]

            # Try extracting tables (fast mode)
            try:
                tables = page.extract_tables(table_settings={
                    "vertical_strategy": "lines",
                    "horizontal_strategy": "lines",
                    "snap_tolerance": 3,
                    "join_tolerance": 3,
                    "edge_min_length": 3,
                    "min_words_vertical": 3,
                    "min_words_horizontal": 1,
                })
            except:
                tables = []

            if tables:
                for table in tables:
                    for row in table:
                        if not any(row):  # skip empty rows
                            continue
                        row_dict = {
                            "Page": page_num + 1,
                            "Type": "Table Row",
                            "Heading": None,
                            "Subheading": None,
                        }
                        for i, col in enumerate(row):
                            row_dict[f"Col_{i+1}"] = col
                        parsed_data.append(row_dict)

            # Extract words for headings, subheadings, paragraphs
            words = page.extract_words(extra_attrs=["size"])
            for w in words:
                text = w["text"].strip()
                size = w["size"]

                if not text:
                    continue

                if size > 15:  # Large font → Heading
                    parsed_data.append({
                        "Page": page_num + 1,
                        "Type": "Heading",
                        "Heading": text,
                        "Subheading": None,
                        "Content": None
                    })
                elif 12 < size <= 15:  # Medium font → Subheading
                    parsed_data.append({
                        "Page": page_num + 1,
                        "Type": "Subheading",
                        "Heading": None,
                        "Subheading": text,
                        "Content": None
                    })
                else:  # Normal font → Paragraph
                    parsed_data.append({
                        "Page": page_num + 1,
                        "Type": "Paragraph",
                        "Heading": None,
                        "Subheading": None,
                        "Content": text
                    })

    df = pd.DataFrame(parsed_data)
    df.to_csv(csv_path, index=False)
    return df

# Example: Limit to first 10 pages for testing speed
df_structured = parse_pdf_to_csv_structured(pdf_path, max_pages=10)
print(df_structured.head(20))


    Page     Type        Heading Subheading Content Col_1 Col_2 Col_3 Col_4  \
0      1  Heading         GANDHI       None    None   NaN   NaN   NaN   NaN   
1      1  Heading      INSTITUTE       None    None   NaN   NaN   NaN   NaN   
2      1  Heading             OF       None    None   NaN   NaN   NaN   NaN   
3      1  Heading     TECHNOLOGY       None    None   NaN   NaN   NaN   NaN   
4      1  Heading            AND       None    None   NaN   NaN   NaN   NaN   
5      1  Heading     MANAGEMENT       None    None   NaN   NaN   NaN   NaN   
6      1  Heading        (GITAM)       None    None   NaN   NaN   NaN   NaN   
7      1  Heading        (Deemed       None    None   NaN   NaN   NaN   NaN   
8      1  Heading             to       None    None   NaN   NaN   NaN   NaN   
9      1  Heading             be       None    None   NaN   NaN   NaN   NaN   
10     1  Heading    University)       None    None   NaN   NaN   NaN   NaN   
11     1  Heading  VISAKHAPATNAM       None    None 

In [None]:
"""
Updated Course Recommender System - Matching New CSV Structure
------------------------------------------------------------
- Works with new CSV column structure
- Smart mandatory/optional course selection
- Interest-based recommendations with variety
- Realistic 8-semester progression
- Prerequisite awareness
- Faculty and infrastructure considerations

Requirements:
pip install pandas scikit-learn openpyxl
"""

import pandas as pd
import numpy as np
import re
import sys
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# ---------- Config ----------
DATA_PATH = "courses_fully_filled.xlsx"
OUTPUT_PATH = "generated_roadmap.xlsx"

MIN_CREDITS_PER_SEM = 24
MIN_COURSES_PER_SEM = 5
MAX_COURSES_PER_SEM = 8
SEMESTERS = 8
DEFAULT_CREDITS = 4

# Target distribution per semester
TARGET_MANDATORY_PER_SEM = 4  # Target mandatory courses per semester
TARGET_ELECTIVES_PER_SEM = 2  # Target elective courses per semester
MAX_ELECTIVES_PER_SEM = 3     # Max elective courses per semester

# Semester progression - when to introduce course types
SEMESTER_PROGRESSION = {
    "foundation": [1, 2, 3, 4],    # Basic, intro, foundation courses
    "intermediate": [3, 4, 5, 6],  # Intermediate, application courses
    "advanced": [5, 6, 7, 8],      # Advanced, specialized, research courses
    "project": [6, 7, 8]           # Major projects in final semesters
}

# Career-specific core subjects that should be prioritized as mandatory
# Career-specific core subjects that should be prioritized as mandatory
CAREER_CORE_SUBJECTS = {
    "ai engineer": [
        "machine learning", "deep learning", "artificial intelligence", "data structures",
        "algorithms", "linear algebra", "statistics", "python", "mathematics", "neural networks",
        "computer vision", "natural language processing", "robotics", "database", "operating systems"
    ],
    "data scientist": [
        "machine learning", "statistics", "data analysis", "data mining", "database",
        "python", "r programming", "linear algebra", "probability", "data visualization",
        "big data", "sql", "analytics", "mathematics", "algorithms"
    ],
    "full stack developer": [
        "web development", "database", "javascript", "html", "css", "node.js", "react",
        "angular", "python", "java", "sql", "operating systems", "computer networks",
        "software engineering", "algorithms", "data structures"
    ],
    "software engineer": [
        "programming", "algorithms", "data structures", "software engineering",
        "operating systems", "database", "computer networks", "object oriented programming",
        "system design", "testing", "debugging", "version control"
    ],
    "cyber security": [
        "cybersecurity", "network security", "cryptography", "ethical hacking",
        "computer networks", "operating systems", "database security", "forensics",
        "risk management", "security protocols", "penetration testing"
    ],
    "mobile developer": [
        "mobile development", "android", "ios", "java", "kotlin", "swift",
        "react native", "flutter", "database", "api development", "user interface"
    ],
    "web developer": [
        "web development", "html", "css", "javascript", "react", "angular", "vue",
        "node.js", "database", "sql", "api development", "responsive design"
    ]
}

SKILL_KEYWORDS = {
    "Beginner": ["intro", "introduction", "basics", "basic", "foundation", "foundations", "fundamentals", "survey", "overview", "principles"],
    "Intermediate": ["intermediate", "applications", "application", "implementation", "design", "systems", "practical", "project", "analysis", "development"],
    "Advanced": ["advanced", "deep", "specialized", "research", "optimization", "complex", "expert", "professional"],
}

def get_career_keywords(career_goal):
    """Get relevant keywords for the given career goal"""
    career_lower = career_goal.lower()

    # Find matching career pattern
    for career_key, keywords in CAREER_CORE_SUBJECTS.items():
        if career_key in career_lower or any(word in career_lower for word in career_key.split()):
            return keywords

    # Default keywords if no specific match
    return ["programming", "algorithms", "data structures", "mathematics", "computer science"]

# Course complexity keywords
COMPLEXITY_KEYWORDS = {
    "foundation": ["intro", "introduction", "basic", "basics", "foundation", "fundamentals", "overview", "survey", "principles", "engineering"],
    "intermediate": ["intermediate", "systems", "design", "implementation", "practical", "applications", "development", "management", "analysis"],
    "advanced": ["advanced", "deep", "research", "optimization", "specialized", "expert", "machine learning", "artificial intelligence", "neural", "blockchain", "cyber", "forensics"]
}

# ---------- Helpers ----------
def normalize_text(s):
    if pd.isna(s):
        return ""
    s = str(s).lower()
    s = re.sub(r"[^a-z0-9\s]", " ", s)
    s = re.sub(r"\s+", " ", s).strip()
    return s

def load_dataset(path=DATA_PATH):
    """Load the updated CSV structure"""
    try:
        df = pd.read_excel(path, sheet_name=0)
    except FileNotFoundError:
        print(f"❌ File not found: {path}")
        print("Please ensure the file 'courses_with_filled_baskets_types.xlsx' is in the current directory")
        sys.exit(1)

    # Expected columns based on new structure
    expected_cols = [
        "basket_code", "basket_name", "course_code", "course_title", "course_type",
        "credits", "allowed_semesters", "prereq_codes", "coreq_codes",
        "sessions_per_week", "session_minutes", "course_description", "max_enrol",
        "infrastructure_notes", "semester_policy", "basket_min_credits",
        "basket_max_credits", "BTECH_Classification", "Faculty_ID", "Faculty_Name", "Class_Strength"
    ]

    # Add missing columns with default values
    for c in expected_cols:
        if c not in df.columns:
            df[c] = np.nan

    # Clean and process data
    df["course_title"] = df["course_title"].fillna("").astype(str)
    df["course_description"] = df["course_description"].fillna("").astype(str)
    df["search_text"] = (df["course_title"] + " " + df["course_description"]).apply(normalize_text)
    df["basket_name"] = df["basket_name"].fillna("Other")
    df["BTECH_Classification"] = df["BTECH_Classification"].fillna("Optional")

    # Handle course codes
    df["course_code"] = df["course_code"].astype(str)
    missing_mask = (df["course_code"].isna()) | (df["course_code"] == "nan") | (df["course_code"] == "")
    df.loc[missing_mask, "course_code"] = df.loc[missing_mask].index.map(lambda x: f"COURSE_{x}")

    # Handle credits
    df["credits"] = pd.to_numeric(df["credits"], errors="coerce").fillna(DEFAULT_CREDITS)

    # Handle allowed semesters
    df["allowed_semesters"] = df["allowed_semesters"].fillna("1,2,3,4,5,6,7,8")

    # Handle prerequisites
    df["prereq_codes"] = df["prereq_codes"].fillna("")
    df["coreq_codes"] = df["coreq_codes"].fillna("")

    print(f"✅ Loaded dataset with {len(df)} courses")
    return df

def parse_allowed_semesters(sem_str):
    """Parse allowed semesters string into list of integers"""
    if pd.isna(sem_str) or sem_str == "":
        return list(range(1, 9))  # All semesters if not specified

    try:
        sems = [int(s.strip()) for s in str(sem_str).split(',') if s.strip().isdigit()]
        return sems if sems else list(range(1, 9))
    except:
        return list(range(1, 9))

def build_vectorizer(corpus):
    # Remove empty strings to avoid issues
    valid_corpus = [text for text in corpus if text and text.strip()]
    if not valid_corpus:
        valid_corpus = ["empty"]

    vect = TfidfVectorizer(ngram_range=(1,2), min_df=1, max_features=2000, stop_words='english')
    X = vect.fit_transform(valid_corpus)
    return vect, X

def interest_to_query(interests, career_goal):
    tokens = []

    # Process interests
    if isinstance(interests, list):
        tokens.extend([t.strip().lower() for t in interests if t and t.strip()])
    else:
        interest_parts = str(interests).replace(",", " ").replace(";", " ").split()
        tokens.extend([t.strip().lower() for t in interest_parts if t and t.strip()])

    # Process career goal
    if career_goal:
        career_parts = str(career_goal).replace(",", " ").split()
        tokens.extend([t.strip().lower() for t in career_parts if t and t.strip()])

    # Add related technical terms for better matching
    query_text = " ".join(tokens)
    if any(term in query_text for term in ["ai", "artificial intelligence", "machine learning"]):
        tokens.extend(["machine learning", "neural networks", "deep learning", "algorithms", "data science"])
    if "data" in query_text:
        tokens.extend(["statistics", "analytics", "mining", "visualization", "database"])
    if any(term in query_text for term in ["web", "frontend", "backend"]):
        tokens.extend(["javascript", "html", "css", "web development", "programming"])
    if any(term in query_text for term in ["mobile", "android", "ios"]):
        tokens.extend(["mobile development", "app development", "android", "ios"])

    return " ".join(set(tokens))  # Remove duplicates

def compute_similarity(vectorizer, X_matrix, query):
    if not query.strip():
        return np.zeros(X_matrix.shape[0])

    try:
        q_vec = vectorizer.transform([query])
        similarities = cosine_similarity(q_vec, X_matrix).flatten()
        return similarities
    except:
        return np.zeros(X_matrix.shape[0])

# Academic progression structure - what should be taught when
ACADEMIC_PROGRESSION = {
    "year_1": {  # Semesters 1-2: Foundation
        "priority_subjects": ["mathematics", "calculus", "linear algebra", "programming", "basics", "foundation", "introduction"],
        "avoid_subjects": ["advanced", "deep", "specialized", "research"],
        "max_specialization": 1  # Max 1 career-specific course per semester
    },
    "year_2": {  # Semesters 3-4: Core CS
        "priority_subjects": ["data structures", "algorithms", "discrete mathematics", "statistics", "probability", "programming"],
        "avoid_subjects": ["advanced", "deep", "research"],
        "max_specialization": 1
    },
    "year_3": {  # Semesters 5-6: System & Theory + Intro to specialization
        "priority_subjects": ["operating systems", "database", "computer networks", "software engineering", "systems"],
        "avoid_subjects": ["advanced", "deep"],
        "max_specialization": 2  # Can start introducing more specialized courses
    },
    "year_4": {  # Semesters 7-8: Advanced topics and specialization
        "priority_subjects": ["advanced", "specialized", "research", "project"],
        "avoid_subjects": ["introduction", "basic", "foundation"],
        "max_specialization": 3  # Full specialization allowed
    }
}

def get_academic_year(semester):
    """Get academic year (1-4) from semester number"""
    return (semester - 1) // 2 + 1

def get_year_key(semester):
    """Get year key for academic progression"""
    year = get_academic_year(semester)
    return f"year_{year}"

def calculate_specialization_saturation(current_courses, career_keywords):
    """Calculate how many career-specific courses are already in the semester"""
    count = 0
    for course in current_courses:
        course_text = (course.get("course_title", "") + " " + course.get("course_description", "")).lower()
        for keyword in career_keywords[:8]:  # Check top career keywords
            if keyword in course_text:
                count += 1
                break  # Don't double count
    return count

def should_prioritize_course_for_semester(course_text, semester, career_keywords):
    """Check if a course should be prioritized for a given semester based on academic progression"""
    year_key = get_year_key(semester)
    progression = ACADEMIC_PROGRESSION.get(year_key, ACADEMIC_PROGRESSION["year_4"])

    course_text = course_text.lower()

    # Check priority subjects for this academic year
    priority_score = sum(1 for subject in progression["priority_subjects"] if subject in course_text)

    # Check avoid subjects for this academic year
    avoid_score = sum(1 for subject in progression["avoid_subjects"] if subject in course_text)

    # Calculate final appropriateness
    appropriateness = priority_score - avoid_score

    return max(appropriateness, -2.0)  # Cap negative impact
    """Calculate how relevant a course is to the career goal"""
    text = course_text.lower()
    score = 0

    for keyword in career_keywords:
        if keyword in text:
            score += 2.0  # High bonus for career-relevant keywords

    return min(score, 10.0)  # Cap at 10.0

def is_core_subject(course_text, career_keywords):
    """Check if a course is a core subject for the career"""
    text = course_text.lower()

    # Core CS subjects that are almost always mandatory
    core_cs_subjects = ["data structures", "algorithms", "operating systems", "database",
                       "computer networks", "software engineering", "programming"]

    # Check for core CS subjects
    for subject in core_cs_subjects:
        if subject in text:
            return True

    # Check for career-specific core subjects
    essential_career_keywords = career_keywords[:5]  # Top 5 most important
    matches = sum(1 for keyword in essential_career_keywords if keyword in text)

    return matches >= 2  # At least 2 career keywords
    """Determine if course is foundation, intermediate, or advanced"""
    text = text.lower()

    foundation_score = sum(1 for kw in COMPLEXITY_KEYWORDS["foundation"] if kw in text)
    intermediate_score = sum(1 for kw in COMPLEXITY_KEYWORDS["intermediate"] if kw in text)
    advanced_score = sum(1 for kw in COMPLEXITY_KEYWORDS["advanced"] if kw in text)

    if advanced_score > 0 and advanced_score >= foundation_score:
        return "advanced"
    elif intermediate_score > foundation_score:
        return "intermediate"
    else:
        return "foundation"

def semester_appropriateness_score(course_complexity, semester):
    """Score how appropriate a course complexity is for a given semester"""
    if course_complexity == "foundation":
        if semester in SEMESTER_PROGRESSION["foundation"]:
            return 2.0
        elif semester <= 4:
            return 1.0
        else:
            return -1.0  # Foundation courses shouldn't be in final semesters

    elif course_complexity == "intermediate":
        if semester in SEMESTER_PROGRESSION["intermediate"]:
            return 2.0
        elif 2 <= semester <= 7:
            return 1.0
        else:
            return 0.0

    elif course_complexity == "advanced":
        if semester in SEMESTER_PROGRESSION["advanced"]:
            return 2.0
        elif semester >= 5:
            return 1.0
        else:
            return -2.0  # Advanced courses shouldn't be in early semesters

    return 0.0

def get_basket_category(basket_name):
    """Normalize basket names to standard categories"""
    basket = str(basket_name).lower()

    if any(term in basket for term in ["core", "foundation"]):
        return "Core"
    elif "minor" in basket:
        return "Minor"
    elif "project" in basket:
        return "Project"
    elif "elective" in basket:
        return "Elective"
    elif any(term in basket for term in ["lab", "laboratory"]):
        return "Lab"
    else:
        return "General"

def can_take_course(course, semester, completed_courses):
    """Check if a course can be taken in a given semester"""
    # Check allowed semesters
    allowed_sems = parse_allowed_semesters(course.get("allowed_semesters", ""))
    if semester not in allowed_sems:
        return False

    # Check prerequisites
    prereqs = str(course.get("prereq_codes", "")).strip()
    if prereqs and prereqs != "nan":
        prereq_list = [p.strip() for p in prereqs.split(',') if p.strip()]
        for prereq in prereq_list:
            if prereq not in completed_courses:
                return False

    return True

def calculate_semester_stats(courses):
    """Calculate total credits and course count for a semester"""
    total_credits = sum(c.get("credits", DEFAULT_CREDITS) for c in courses)
    course_count = len(courses)
    return total_credits, course_count

def meets_semester_requirements(courses):
    """Check if semester meets credit and course count requirements"""
    total_credits, course_count = calculate_semester_stats(courses)
    return (total_credits >= MIN_CREDITS_PER_SEM and
            MIN_COURSES_PER_SEM <= course_count <= MAX_COURSES_PER_SEM)

def count_mandatory_in_semester(courses):
    """Count how many mandatory courses are in the semester"""
    return sum(1 for c in courses if "mandatory" in str(c.get("BTECH_Classification", "")).lower())

def count_optional_in_semester(courses):
    """Count how many optional/elective courses are in the semester"""
    return sum(1 for c in courses if "optional" in str(c.get("BTECH_Classification", "")).lower())

# ---------- Smart Roadmap Generation ----------
def generate_roadmap(df, career_goal, interests, skill_level="Intermediate", semesters=SEMESTERS):
    df = df.copy()
    print(f"\n🎯 Starting roadmap generation...")
    print(f"Career goal: {career_goal}")
    print(f"Interests: {interests}")
    print(f"Skill level: {skill_level}")

    # Get career-specific keywords
    career_keywords = get_career_keywords(career_goal)
    print(f"Career-relevant keywords: {', '.join(career_keywords[:8])}")

    # Add course analysis
    df["basket_category"] = df["basket_name"].apply(get_basket_category)
    df["course_complexity"] = df["search_text"].apply(get_course_complexity)
    df["allowed_sems_list"] = df["allowed_semesters"].apply(parse_allowed_semesters)

    # Add career relevance scoring
    df["career_relevance"] = df["search_text"].apply(lambda x: calculate_career_relevance_score(x, career_keywords))
    df["is_core_subject"] = df["search_text"].apply(lambda x: is_core_subject(x, career_keywords))

    # Build interest matching
    print("🔍 Building interest similarity scores...")
    vectorizer, X = build_vectorizer(df["search_text"].tolist())
    query = interest_to_query(interests, career_goal)
    print(f"Search query: '{query}'")

    df["interest_similarity"] = compute_similarity(vectorizer, X, query)
    print(f"Interest similarity range: {df['interest_similarity'].min():.3f} to {df['interest_similarity'].max():.3f}")

    # Skill alignment
    df["skill_alignment"] = df["search_text"].apply(
        lambda t: len([kw for kw in SKILL_KEYWORDS.get(skill_level, []) if kw in t.lower()])
    )

    # Initialize semester mapping and completed courses tracker
    semesters_map = {s: [] for s in range(1, semesters+1)}
    completed_courses = set()

    # Process each semester
    for sem in range(1, semesters+1):
        print(f"\n--- 📚 Processing Semester {sem} ---")

        # Get available courses for this semester
        df_available = df[
            (~df["course_code"].isin(completed_courses)) &
            (df["allowed_sems_list"].apply(lambda x: sem in x))
        ].copy()

        if df_available.empty:
            print(f"No more courses available for semester {sem}")
            continue

        # Filter by prerequisites
        df_available = df_available[
            df_available.apply(lambda course: can_take_course(course, sem, completed_courses), axis=1)
        ].copy()

        if df_available.empty:
            print(f"No courses available after prerequisite filtering for semester {sem}")
            continue

        # Calculate semester-specific scores
        df_available["semester_appropriateness"] = df_available["course_complexity"].apply(
            lambda c: semester_appropriateness_score(c, sem)
        )

        # Separate mandatory and optional courses
        mandatory_courses = df_available[
            df_available["BTECH_Classification"].str.lower().str.contains("mandatory", na=False)
        ].copy()

        optional_courses = df_available[
            ~df_available["BTECH_Classification"].str.lower().str.contains("mandatory", na=False)
        ].copy()

        print(f"Available: {len(mandatory_courses)} mandatory, {len(optional_courses)} optional courses")

        # Enhanced scoring for mandatory courses - prioritize core subjects AND academic progression
        if not mandatory_courses.empty:
            # Add academic progression scoring
            mandatory_courses["academic_appropriateness"] = mandatory_courses["search_text"].apply(
                lambda x: should_prioritize_course_for_semester(x, sem, career_keywords)
            )

            mandatory_courses["total_score"] = (
                2.0 * mandatory_courses["academic_appropriateness"] +  # NEW: Academic progression priority
                2.0 * mandatory_courses["is_core_subject"].astype(float) +  # Core subjects
                1.5 * mandatory_courses["career_relevance"] +     # Reduced weight for career relevance
                1.0 * mandatory_courses["interest_similarity"] +
                0.5 * mandatory_courses["skill_alignment"] +
                1.0 * mandatory_courses["semester_appropriateness"] +
                1.0  # Base score for being mandatory
            )
            mandatory_courses = mandatory_courses.sort_values("total_score", ascending=False)

        # Enhanced scoring for optional courses - balance interests and academic progression
        if not optional_courses.empty:
            # Add academic progression scoring
            optional_courses["academic_appropriateness"] = optional_courses["search_text"].apply(
                lambda x: should_prioritize_course_for_semester(x, sem, career_keywords)
            )

            optional_courses["total_score"] = (
                2.5 * optional_courses["interest_similarity"] +    # Keep high weight for interests
                2.0 * optional_courses["academic_appropriateness"] + # NEW: Academic progression
                1.5 * optional_courses["career_relevance"] +      # Reduced weight
                1.0 * optional_courses["skill_alignment"] +
                1.0 * optional_courses["semester_appropriateness"]
            )
            optional_courses = optional_courses.sort_values("total_score", ascending=False)

        # Build semester course list with saturation control
        current_courses = semesters_map[sem]
        year_key = get_year_key(sem)
        max_specialization = ACADEMIC_PROGRESSION.get(year_key, ACADEMIC_PROGRESSION["year_4"])["max_specialization"]

        # Phase 1: Add mandatory courses - prioritize academic progression
        mandatory_added = 0

        # First, add core mandatory subjects that fit academic progression
        core_mandatory = mandatory_courses[mandatory_courses["is_core_subject"] == True]
        for _, course in core_mandatory.iterrows():
            if (mandatory_added < TARGET_MANDATORY_PER_SEM and
                len(current_courses) < MAX_COURSES_PER_SEM):

                course_dict = course.to_dict()
                # Ensure all required fields are present
                course_dict["skill_alignment"] = course_dict.get("skill_alignment", 0.0)
                course_dict["career_relevance"] = course_dict.get("career_relevance", 0.0)
                course_dict["is_core_subject"] = course_dict.get("is_core_subject", False)
                course_dict["academic_appropriateness"] = course_dict.get("academic_appropriateness", 0.0)

                current_courses.append(course_dict)
                completed_courses.add(course["course_code"])
                mandatory_added += 1
                print(f"  Added core mandatory: {course['course_code']} - {course['course_title'][:40]} (acad: {course.get('academic_appropriateness', 0):.1f})")

        # Then add other high-scoring mandatory courses
        other_mandatory = mandatory_courses[mandatory_courses["is_core_subject"] == False]
        for _, course in other_mandatory.iterrows():
            if (mandatory_added < TARGET_MANDATORY_PER_SEM and
                len(current_courses) < MAX_COURSES_PER_SEM):

                course_dict = course.to_dict()
                # Ensure all required fields are present
                course_dict["skill_alignment"] = course_dict.get("skill_alignment", 0.0)
                course_dict["career_relevance"] = course_dict.get("career_relevance", 0.0)
                course_dict["is_core_subject"] = course_dict.get("is_core_subject", False)
                course_dict["academic_appropriateness"] = course_dict.get("academic_appropriateness", 0.0)

                current_courses.append(course_dict)
                completed_courses.add(course["course_code"])
                mandatory_added += 1
                print(f"  Added mandatory: {course['course_code']} - {course['course_title'][:40]} (acad: {course.get('academic_appropriateness', 0):.1f})")

        # Phase 2: Add optional courses with specialization saturation control
        optional_added = 0
        specialization_count = calculate_specialization_saturation(current_courses, career_keywords)

        for _, course in optional_courses.iterrows():
            total_credits, course_count = calculate_semester_stats(current_courses)

            # Check if this course would exceed specialization limit
            course_text = (course.get("course_title", "") + " " + course.get("course_description", "")).lower()
            is_specialized = any(keyword in course_text for keyword in career_keywords[:8])

            # Specialization saturation check
            if is_specialized and specialization_count >= max_specialization:
                print(f"  Skipped {course['course_code']} - specialization saturation ({specialization_count}/{max_specialization})")
                continue

            # Stricter limits on optional courses
            if (course_count < MAX_COURSES_PER_SEM and
                optional_added < MAX_ELECTIVES_PER_SEM and
                (not meets_semester_requirements(current_courses) or optional_added < TARGET_ELECTIVES_PER_SEM)):

                course_dict = course.to_dict()
                # Ensure all required fields are present
                course_dict["skill_alignment"] = course_dict.get("skill_alignment", 0.0)
                course_dict["career_relevance"] = course_dict.get("career_relevance", 0.0)
                course_dict["is_core_subject"] = course_dict.get("is_core_subject", False)
                course_dict["academic_appropriateness"] = course_dict.get("academic_appropriateness", 0.0)

                current_courses.append(course_dict)
                completed_courses.add(course["course_code"])
                optional_added += 1

                # Update specialization count
                if is_specialized:
                    specialization_count += 1

                print(f"  Added elective: {course['course_code']} - {course['course_title'][:40]} (acad: {course.get('academic_appropriateness', 0):.1f})")

                # Check if semester is now complete
                if meets_semester_requirements(current_courses) and optional_added >= TARGET_ELECTIVES_PER_SEM:
                    break

        # Update semester mapping
        semesters_map[sem] = current_courses

        # Print semester summary
        total_credits, course_count = calculate_semester_stats(current_courses)
        mand_count = count_mandatory_in_semester(current_courses)
        opt_count = count_optional_in_semester(current_courses)
        print(f"  📊 Semester {sem} summary: {course_count} courses ({mand_count} mandatory, {opt_count} optional), {total_credits} credits")

    # Create output dataframe
    rows = []
    for sem in range(1, semesters+1):
        courses_list = semesters_map[sem]
        total_credits, course_count = calculate_semester_stats(courses_list)

        rows.append({
            "semester_num": sem,
            "num_courses": course_count,
            "total_credits": total_credits,
            "meets_requirements": meets_semester_requirements(courses_list),
            "mandatory_count": count_mandatory_in_semester(courses_list),
            "optional_count": count_optional_in_semester(courses_list),
            "courses": [
                {
                    "course_code": c.get("course_code"),
                    "course_title": c.get("course_title"),
                    "basket_name": c.get("basket_name"),
                    "basket_category": get_basket_category(c.get("basket_name")),
                    "BTECH_Classification": c.get("BTECH_Classification"),
                    "credits": c.get("credits", DEFAULT_CREDITS),
                    "course_type": c.get("course_type", "Theory"),
                    "complexity": c.get("course_complexity"),
                    "faculty_name": c.get("Faculty_Name", "TBA"),
                    "interest_similarity": float(c.get("interest_similarity", 0.0)),
                    "career_relevance": float(c.get("career_relevance", 0.0)),
                    "is_core_subject": bool(c.get("is_core_subject", False)),
                    "total_score": float(c.get("total_score", 0.0))
                } for c in courses_list
            ]
        })

    return pd.DataFrame(rows)

def export_roadmap_to_excel(roadmap_df, out_path=OUTPUT_PATH):
    """Export roadmap to Excel file"""
    out_rows = []
    for _, r in roadmap_df.iterrows():
        for c in r['courses']:
            out_rows.append({
                "semester_num": r['semester_num'],
                "course_code": c.get("course_code", ""),
                "course_title": c.get("course_title", ""),
                "basket_name": c.get("basket_name", ""),
                "basket_category": c.get("basket_category", ""),
                "BTECH_Classification": c.get("BTECH_Classification", ""),
                "credits": c.get("credits", DEFAULT_CREDITS),
                "course_type": c.get("course_type", "Theory"),
                "complexity": c.get("complexity", "foundation"),
                "faculty_name": c.get("faculty_name", "TBA"),
                "interest_similarity": c.get("interest_similarity", 0.0),
                "skill_alignment": c.get("skill_alignment", 0.0),
                "career_relevance": c.get("career_relevance", 0.0),
                "is_core_subject": c.get("is_core_subject", False),
                "total_score": c.get("total_score", 0.0),
                "semester_total_credits": r['total_credits'],
                "semester_mandatory_count": r['mandatory_count'],
                "semester_optional_count": r['optional_count']
            })

    out_df = pd.DataFrame(out_rows)
    out_df.to_excel(out_path, index=False)
    print(f"\nRoadmap exported to {out_path}")

# ---------- Main ----------
def main():
    print("🎓 Advanced Course Recommender System - Updated Version\n")
    print("=" * 60)

    # Load dataset
    df = load_dataset(DATA_PATH)

    # Show dataset statistics
    mand_count = df[df["BTECH_Classification"].str.lower().str.contains("mandatory", na=False)].shape[0]
    opt_count = df[~df["BTECH_Classification"].str.lower().str.contains("mandatory", na=False)].shape[0]
    print(f"📊 Dataset contains: {mand_count} mandatory courses, {opt_count} optional courses")

    # Show unique basket categories
    baskets = df["basket_name"].value_counts().head(10)
    print(f"🗂️  Top course baskets: {', '.join(baskets.index[:5].tolist())}")

    print("\n" + "=" * 60)
    print("🔥 Let's build your personalized academic roadmap!")
    print("=" * 60)

    # Get user inputs
    career_goal = input("\n🎯 What's your career goal? (e.g., 'AI Engineer', 'Data Scientist', 'Full Stack Developer'): ").strip()
    interests = input("💡 What are your interests? (comma-separated, e.g., 'machine learning, web development, mobile apps'): ").strip()
    skill_level = input("📈 What's your current skill level? (Beginner/Intermediate/Advanced) [Intermediate]: ").strip() or "Intermediate"

    print(f"\n🎯 Target per semester:")
    print(f"   • Target {TARGET_MANDATORY_PER_SEM} mandatory courses (prioritizing core subjects)")
    print(f"   • Target {TARGET_ELECTIVES_PER_SEM} elective courses (max {MAX_ELECTIVES_PER_SEM})")
    print(f"   • {MIN_CREDITS_PER_SEM}+ credits total")
    print(f"   • Courses selected based on career relevance and interests: {interests}")

    # Generate roadmap
    roadmap = generate_roadmap(df, career_goal, interests, skill_level=skill_level, semesters=SEMESTERS)

    # Display roadmap
    print("\n" + "=" * 100)
    print("🎓 YOUR PERSONALIZED 8-SEMESTER ACADEMIC ROADMAP")
    print("=" * 100)

    for _, r in roadmap.iterrows():
        status_icon = "✅" if r['meets_requirements'] else "⚠️"
        year = get_academic_year(r['semester_num'])

        print(f"\n{status_icon} Semester {r['semester_num']} (Year {year}) | {r['num_courses']} courses | {r['total_credits']} credits | "
              f"Mandatory: {r['mandatory_count']}, Elective: {r['optional_count']}")
        print("-" * 100)

        for c in r['courses']:
            mand_flag = ("MAND" if "mandatory" in str(c['BTECH_Classification']).lower() else "ELEC")
            title_short = c['course_title'][:35] + "..." if len(c['course_title']) > 35 else c['course_title']
            core_flag = " [CORE]" if c.get('is_core_subject', False) else ""
            acad_score = c.get('academic_appropriateness', 0)

            print(f"  • {c['course_code']:<10} | {title_short:<38} | {c['basket_category']:<8} | {mand_flag:<4} | "
                  f"{c['credits']}cr | {c['complexity']:<12} | Acad:{acad_score:.1f} Int:{c['interest_similarity']:.2f}{core_flag}")

    # Summary
    total_credits = sum(r['total_credits'] for _, r in roadmap.iterrows())
    total_courses = sum(r['num_courses'] for _, r in roadmap.iterrows())
    total_mandatory = sum(r['mandatory_count'] for _, r in roadmap.iterrows())
    total_optional = sum(r['optional_count'] for _, r in roadmap.iterrows())
    sems_meeting_req = sum(1 for _, r in roadmap.iterrows() if r['meets_requirements'])

    print("\n" + "=" * 100)
    print("📊 ROADMAP SUMMARY")
    print("=" * 100)
    print(f"🎯 Total Credits: {total_credits}")
    print(f"📚 Total Courses: {total_courses} (Mandatory: {total_mandatory}, Elective: {total_optional})")
    print(f"✅ Semesters Meeting Requirements: {sems_meeting_req}/{SEMESTERS}")
    print(f"⚖️  Mandatory/Elective Ratio: {total_mandatory/total_courses:.1%} / {total_optional/total_courses:.1%}")

    # Export to Excel
    export_roadmap_to_excel(roadmap, OUTPUT_PATH)

    print(f"\n🎉 Your personalized roadmap is ready!")
    print(f"📁 Check '{OUTPUT_PATH}' for the detailed Excel version.")

if __name__ == "__main__":
    main()

🎓 Advanced Course Recommender System - Updated Version

✅ Loaded dataset with 773 courses
📊 Dataset contains: 227 mandatory courses, 546 optional courses
🗂️  Top course baskets: Open Elective, Foundation Core - Basic Sciences and Mathematics, Discipline Core, Specialization Elective, Non-graded Core Requirement

🔥 Let's build your personalized academic roadmap!

🎯 What's your career goal? (e.g., 'AI Engineer', 'Data Scientist', 'Full Stack Developer'): AI Engineer 
💡 What are your interests? (comma-separated, e.g., 'machine learning, web development, mobile apps'): machine learning, AI, psychology , entrepreneurship 
📈 What's your current skill level? (Beginner/Intermediate/Advanced) [Intermediate]: beginner

🎯 Target per semester:
   • Target 4 mandatory courses (prioritizing core subjects)
   • Target 2 elective courses (max 3)
   • 24+ credits total
   • Courses selected based on career relevance and interests: machine learning, AI, psychology , entrepreneurship

🎯 Starting roadmap 

In [None]:
import pandas as pd
import numpy as np
import re
import sys
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

DATA_PATH = "courses_fully_filled.xlsx"
OUTPUT_PATH = "generated_roadmap.xlsx"

MIN_CREDITS_PER_SEM = 24
MIN_COURSES_PER_SEM = 5
MAX_COURSES_PER_SEM = 8
SEMESTERS = 8
DEFAULT_CREDITS = 4

# Target distribution per semester
TARGET_MANDATORY_PER_SEM = 4  # Target mandatory courses per semester
TARGET_ELECTIVES_PER_SEM = 2  # Target elective courses per semester
MAX_ELECTIVES_PER_SEM = 3     # Max elective courses per semester

# Semester progression - when to introduce course types
SEMESTER_PROGRESSION = {
    "foundation": [1, 2, 3, 4],    # Basic, intro, foundation courses
    "intermediate": [3, 4, 5, 6],  # Intermediate, application courses
    "advanced": [5, 6, 7, 8],      # Advanced, specialized, research courses
    "project": [6, 7, 8]           # Major projects in final semesters
}

# Career-specific core subjects that should be prioritized as mandatory
# Career-specific core subjects that should be prioritized as mandatory
CAREER_CORE_SUBJECTS = {
    "ai engineer": [
        "machine learning", "deep learning", "artificial intelligence", "data structures",
        "algorithms", "linear algebra", "statistics", "python", "mathematics", "neural networks",
        "computer vision", "natural language processing", "robotics", "database", "operating systems"
    ],
    "data scientist": [
        "machine learning", "statistics", "data analysis", "data mining", "database",
        "python", "r programming", "linear algebra", "probability", "data visualization",
        "big data", "sql", "analytics", "mathematics", "algorithms"
    ],
    "full stack developer": [
        "web development", "database", "javascript", "html", "css", "node.js", "react",
        "angular", "python", "java", "sql", "operating systems", "computer networks",
        "software engineering", "algorithms", "data structures"
    ],
    "software engineer": [
        "programming", "algorithms", "data structures", "software engineering",
        "operating systems", "database", "computer networks", "object oriented programming",
        "system design", "testing", "debugging", "version control"
    ],
    "cyber security": [
        "cybersecurity", "network security", "cryptography", "ethical hacking",
        "computer networks", "operating systems", "database security", "forensics",
        "risk management", "security protocols", "penetration testing"
    ],
    "mobile developer": [
        "mobile development", "android", "ios", "java", "kotlin", "swift",
        "react native", "flutter", "database", "api development", "user interface"
    ],
    "web developer": [
        "web development", "html", "css", "javascript", "react", "angular", "vue",
        "node.js", "database", "sql", "api development", "responsive design"
    ]
}

SKILL_KEYWORDS = {
    "Beginner": ["intro", "introduction", "basics", "basic", "foundation", "foundations", "fundamentals", "survey", "overview", "principles"],
    "Intermediate": ["intermediate", "applications", "application", "implementation", "design", "systems", "practical", "project", "analysis", "development"],
    "Advanced": ["advanced", "deep", "specialized", "research", "optimization", "complex", "expert", "professional"],
}

def get_career_keywords(career_goal):
    """Get relevant keywords for the given career goal"""
    career_lower = career_goal.lower()

    # Find matching career pattern
    for career_key, keywords in CAREER_CORE_SUBJECTS.items():
        if career_key in career_lower or any(word in career_lower for word in career_key.split()):
            return keywords

    # Default keywords if no specific match
    return ["programming", "algorithms", "data structures", "mathematics", "computer science"]

# Course complexity keywords
COMPLEXITY_KEYWORDS = {
    "foundation": ["intro", "introduction", "basic", "basics", "foundation", "fundamentals", "overview", "survey", "principles", "engineering"],
    "intermediate": ["intermediate", "systems", "design", "implementation", "practical", "applications", "development", "management", "analysis"],
    "advanced": ["advanced", "deep", "research", "optimization", "specialized", "expert", "machine learning", "artificial intelligence", "neural", "blockchain", "cyber", "forensics"]
}

# ---------- Helpers ----------
def normalize_text(s):
    if pd.isna(s):
        return ""
    s = str(s).lower()
    s = re.sub(r"[^a-z0-9\s]", " ", s)
    s = re.sub(r"\s+", " ", s).strip()
    return s

def load_dataset(path=DATA_PATH):
    """Load the updated CSV structure"""
    try:
        df = pd.read_excel(path, sheet_name=0)
    except FileNotFoundError:
        print(f"❌ File not found: {path}")
        print("Please ensure the file 'courses_with_filled_baskets_types.xlsx' is in the current directory")
        sys.exit(1)

    # Expected columns based on new structure
    expected_cols = [
        "basket_code", "basket_name", "course_code", "course_title", "course_type",
        "credits", "allowed_semesters", "prereq_codes", "coreq_codes",
        "sessions_per_week", "session_minutes", "course_description", "max_enrol",
        "infrastructure_notes", "semester_policy", "basket_min_credits",
        "basket_max_credits", "BTECH_Classification", "Faculty_ID", "Faculty_Name", "Class_Strength"
    ]

    # Add missing columns with default values
    for c in expected_cols:
        if c not in df.columns:
            df[c] = np.nan

    # Clean and process data
    df["course_title"] = df["course_title"].fillna("").astype(str)
    df["course_description"] = df["course_description"].fillna("").astype(str)
    df["search_text"] = (df["course_title"] + " " + df["course_description"]).apply(normalize_text)
    df["basket_name"] = df["basket_name"].fillna("Other")
    df["BTECH_Classification"] = df["BTECH_Classification"].fillna("Optional")

    # Handle course codes
    df["course_code"] = df["course_code"].astype(str)
    missing_mask = (df["course_code"].isna()) | (df["course_code"] == "nan") | (df["course_code"] == "")
    df.loc[missing_mask, "course_code"] = df.loc[missing_mask].index.map(lambda x: f"COURSE_{x}")

    # Handle credits
    df["credits"] = pd.to_numeric(df["credits"], errors="coerce").fillna(DEFAULT_CREDITS)

    # Handle allowed semesters
    df["allowed_semesters"] = df["allowed_semesters"].fillna("1,2,3,4,5,6,7,8")

    # Handle prerequisites
    df["prereq_codes"] = df["prereq_codes"].fillna("")
    df["coreq_codes"] = df["coreq_codes"].fillna("")

    print(f"✅ Loaded dataset with {len(df)} courses")
    return df

def parse_allowed_semesters(sem_str):
    """Parse allowed semesters string into list of integers"""
    if pd.isna(sem_str) or sem_str == "":
        return list(range(1, 9))  # All semesters if not specified

    try:
        sems = [int(s.strip()) for s in str(sem_str).split(',') if s.strip().isdigit()]
        return sems if sems else list(range(1, 9))
    except:
        return list(range(1, 9))

def build_vectorizer(corpus):
    # Remove empty strings to avoid issues
    valid_corpus = [text for text in corpus if text and text.strip()]
    if not valid_corpus:
        valid_corpus = ["empty"]

    vect = TfidfVectorizer(ngram_range=(1,2), min_df=1, max_features=2000, stop_words='english')
    X = vect.fit_transform(valid_corpus)
    return vect, X

def interest_to_query(interests, career_goal):
    tokens = []

    # Process interests
    if isinstance(interests, list):
        tokens.extend([t.strip().lower() for t in interests if t and t.strip()])
    else:
        interest_parts = str(interests).replace(",", " ").replace(";", " ").split()
        tokens.extend([t.strip().lower() for t in interest_parts if t and t.strip()])

    # Process career goal
    if career_goal:
        career_parts = str(career_goal).replace(",", " ").split()
        tokens.extend([t.strip().lower() for t in career_parts if t and t.strip()])

    # Add related technical terms for better matching
    query_text = " ".join(tokens)
    if any(term in query_text for term in ["ai", "artificial intelligence", "machine learning"]):
        tokens.extend(["machine learning", "neural networks", "deep learning", "algorithms", "data science"])
    if "data" in query_text:
        tokens.extend(["statistics", "analytics", "mining", "visualization", "database"])
    if any(term in query_text for term in ["web", "frontend", "backend"]):
        tokens.extend(["javascript", "html", "css", "web development", "programming"])
    if any(term in query_text for term in ["mobile", "android", "ios"]):
        tokens.extend(["mobile development", "app development", "android", "ios"])

    return " ".join(set(tokens))  # Remove duplicates

def compute_similarity(vectorizer, X_matrix, query):
    if not query.strip():
        return np.zeros(X_matrix.shape[0])

    try:
        q_vec = vectorizer.transform([query])
        similarities = cosine_similarity(q_vec, X_matrix).flatten()
        return similarities
    except:
        return np.zeros(X_matrix.shape[0])

# Academic progression structure - what should be taught when
ACADEMIC_PROGRESSION = {
    "year_1": {  # Semesters 1-2: Foundation
        "priority_subjects": ["mathematics", "calculus", "linear algebra", "programming", "basics", "foundation", "introduction"],
        "avoid_subjects": ["advanced", "deep", "specialized", "research"],
        "max_specialization": 1  # Max 1 career-specific course per semester
    },
    "year_2": {  # Semesters 3-4: Core CS
        "priority_subjects": ["data structures", "algorithms", "discrete mathematics", "statistics", "probability", "programming"],
        "avoid_subjects": ["advanced", "deep", "research"],
        "max_specialization": 1
    },
    "year_3": {  # Semesters 5-6: System & Theory + Intro to specialization
        "priority_subjects": ["operating systems", "database", "computer networks", "software engineering", "systems"],
        "avoid_subjects": ["advanced", "deep"],
        "max_specialization": 2  # Can start introducing more specialized courses
    },
    "year_4": {  # Semesters 7-8: Advanced topics and specialization
        "priority_subjects": ["advanced", "specialized", "research", "project"],
        "avoid_subjects": ["introduction", "basic", "foundation"],
        "max_specialization": 3  # Full specialization allowed
    }
}

def get_academic_year(semester):
    """Get academic year (1-4) from semester number"""
    return (semester - 1) // 2 + 1

def get_year_key(semester):
    """Get year key for academic progression"""
    year = get_academic_year(semester)
    return f"year_{year}"

def calculate_specialization_saturation(current_courses, career_keywords):
    """Calculate how many career-specific courses are already in the semester"""
    count = 0
    for course in current_courses:
        course_text = (course.get("course_title", "") + " " + course.get("course_description", "")).lower()
        for keyword in career_keywords[:8]:  # Check top career keywords
            if keyword in course_text:
                count += 1
                break  # Don't double count
    return count

def should_prioritize_course_for_semester(course_text, semester, career_keywords):
    """Check if a course should be prioritized for a given semester based on academic progression"""
    year_key = get_year_key(semester)
    progression = ACADEMIC_PROGRESSION.get(year_key, ACADEMIC_PROGRESSION["year_4"])

    course_text = course_text.lower()

    # Check priority subjects for this academic year
    priority_score = sum(1 for subject in progression["priority_subjects"] if subject in course_text)

    # Check avoid subjects for this academic year
    avoid_score = sum(1 for subject in progression["avoid_subjects"] if subject in course_text)

    # Calculate final appropriateness
    appropriateness = priority_score - avoid_score

    return max(appropriateness, -2.0)  # Cap negative impact
    """Calculate how relevant a course is to the career goal"""
    text = course_text.lower()
    score = 0

    for keyword in career_keywords:
        if keyword in text:
            score += 2.0  # High bonus for career-relevant keywords

    return min(score, 10.0)  # Cap at 10.0

def is_core_subject(course_text, career_keywords):
    """Check if a course is a core subject for the career"""
    text = course_text.lower()

    # Core CS subjects that are almost always mandatory
    core_cs_subjects = ["data structures", "algorithms", "operating systems", "database",
                       "computer networks", "software engineering", "programming"]

    # Check for core CS subjects
    for subject in core_cs_subjects:
        if subject in text:
            return True

    # Check for career-specific core subjects
    essential_career_keywords = career_keywords[:5]  # Top 5 most important
    matches = sum(1 for keyword in essential_career_keywords if keyword in text)

    return matches >= 2  # At least 2 career keywords
    """Determine if course is foundation, intermediate, or advanced"""
    text = text.lower()

    foundation_score = sum(1 for kw in COMPLEXITY_KEYWORDS["foundation"] if kw in text)
    intermediate_score = sum(1 for kw in COMPLEXITY_KEYWORDS["intermediate"] if kw in text)
    advanced_score = sum(1 for kw in COMPLEXITY_KEYWORDS["advanced"] if kw in text)

    if advanced_score > 0 and advanced_score >= foundation_score:
        return "advanced"
    elif intermediate_score > foundation_score:
        return "intermediate"
    else:
        return "foundation"

def semester_appropriateness_score(course_complexity, semester):
    """Score how appropriate a course complexity is for a given semester"""
    if course_complexity == "foundation":
        if semester in SEMESTER_PROGRESSION["foundation"]:
            return 2.0
        elif semester <= 4:
            return 1.0
        else:
            return -1.0  # Foundation courses shouldn't be in final semesters

    elif course_complexity == "intermediate":
        if semester in SEMESTER_PROGRESSION["intermediate"]:
            return 2.0
        elif 2 <= semester <= 7:
            return 1.0
        else:
            return 0.0

    elif course_complexity == "advanced":
        if semester in SEMESTER_PROGRESSION["advanced"]:
            return 2.0
        elif semester >= 5:
            return 1.0
        else:
            return -2.0  # Advanced courses shouldn't be in early semesters

    return 0.0

def get_basket_category(basket_name):
    """Normalize basket names to standard categories"""
    basket = str(basket_name).lower()

    if any(term in basket for term in ["core", "foundation"]):
        return "Core"
    elif "minor" in basket:
        return "Minor"
    elif "project" in basket:
        return "Project"
    elif "elective" in basket:
        return "Elective"
    elif any(term in basket for term in ["lab", "laboratory"]):
        return "Lab"
    else:
        return "General"

def can_take_course(course, semester, completed_courses):
    """Check if a course can be taken in a given semester"""
    # Check allowed semesters
    allowed_sems = parse_allowed_semesters(course.get("allowed_semesters", ""))
    if semester not in allowed_sems:
        return False

    # Check prerequisites
    prereqs = str(course.get("prereq_codes", "")).strip()
    if prereqs and prereqs != "nan":
        prereq_list = [p.strip() for p in prereqs.split(',') if p.strip()]
        for prereq in prereq_list:
            if prereq not in completed_courses:
                return False

    return True

def calculate_semester_stats(courses):
    """Calculate total credits and course count for a semester"""
    total_credits = sum(c.get("credits", DEFAULT_CREDITS) for c in courses)
    course_count = len(courses)
    return total_credits, course_count

def meets_semester_requirements(courses):
    """Check if semester meets credit and course count requirements"""
    total_credits, course_count = calculate_semester_stats(courses)
    return (total_credits >= MIN_CREDITS_PER_SEM and
            MIN_COURSES_PER_SEM <= course_count <= MAX_COURSES_PER_SEM)

def count_mandatory_in_semester(courses):
    """Count how many mandatory courses are in the semester"""
    return sum(1 for c in courses if "mandatory" in str(c.get("BTECH_Classification", "")).lower())

def count_optional_in_semester(courses):
    """Count how many optional/elective courses are in the semester"""
    return sum(1 for c in courses if "optional" in str(c.get("BTECH_Classification", "")).lower())

# ---------- Smart Roadmap Generation ----------
def generate_roadmap(df, career_goal, interests, skill_level="Intermediate", semesters=SEMESTERS):
    df = df.copy()
    print(f"\n🎯 Starting roadmap generation...")
    print(f"Career goal: {career_goal}")
    print(f"Interests: {interests}")
    print(f"Skill level: {skill_level}")

    # Get career-specific keywords
    career_keywords = get_career_keywords(career_goal)
    print(f"Career-relevant keywords: {', '.join(career_keywords[:8])}")

    # Add course analysis
    df["basket_category"] = df["basket_name"].apply(get_basket_category)
    df["course_complexity"] = df["search_text"].apply(get_course_complexity)
    df["allowed_sems_list"] = df["allowed_semesters"].apply(parse_allowed_semesters)

    # Add career relevance scoring
    df["career_relevance"] = df["search_text"].apply(lambda x: calculate_career_relevance_score(x, career_keywords))
    df["is_core_subject"] = df["search_text"].apply(lambda x: is_core_subject(x, career_keywords))

    # Build interest matching
    print("🔍 Building interest similarity scores...")
    vectorizer, X = build_vectorizer(df["search_text"].tolist())
    query = interest_to_query(interests, career_goal)
    print(f"Search query: '{query}'")

    df["interest_similarity"] = compute_similarity(vectorizer, X, query)
    print(f"Interest similarity range: {df['interest_similarity'].min():.3f} to {df['interest_similarity'].max():.3f}")

    # Skill alignment
    df["skill_alignment"] = df["search_text"].apply(
        lambda t: len([kw for kw in SKILL_KEYWORDS.get(skill_level, []) if kw in t.lower()])
    )

    # Initialize semester mapping and completed courses tracker
    semesters_map = {s: [] for s in range(1, semesters+1)}
    completed_courses = set()

    # Process each semester
    for sem in range(1, semesters+1):
        print(f"\n--- 📚 Processing Semester {sem} ---")

        # Get available courses for this semester
        df_available = df[
            (~df["course_code"].isin(completed_courses)) &
            (df["allowed_sems_list"].apply(lambda x: sem in x))
        ].copy()

        if df_available.empty:
            print(f"No more courses available for semester {sem}")
            continue

        # Filter by prerequisites
        df_available = df_available[
            df_available.apply(lambda course: can_take_course(course, sem, completed_courses), axis=1)
        ].copy()

        if df_available.empty:
            print(f"No courses available after prerequisite filtering for semester {sem}")
            continue

        # Calculate semester-specific scores
        df_available["semester_appropriateness"] = df_available["course_complexity"].apply(
            lambda c: semester_appropriateness_score(c, sem)
        )

        # Separate mandatory and optional courses
        mandatory_courses = df_available[
            df_available["BTECH_Classification"].str.lower().str.contains("mandatory", na=False)
        ].copy()

        optional_courses = df_available[
            ~df_available["BTECH_Classification"].str.lower().str.contains("mandatory", na=False)
        ].copy()

        print(f"Available: {len(mandatory_courses)} mandatory, {len(optional_courses)} optional courses")

        # Enhanced scoring for mandatory courses - prioritize core subjects AND academic progression
        if not mandatory_courses.empty:
            # Add academic progression scoring
            mandatory_courses["academic_appropriateness"] = mandatory_courses["search_text"].apply(
                lambda x: should_prioritize_course_for_semester(x, sem, career_keywords)
            )

            mandatory_courses["total_score"] = (
                2.0 * mandatory_courses["academic_appropriateness"] +  # NEW: Academic progression priority
                2.0 * mandatory_courses["is_core_subject"].astype(float) +  # Core subjects
                1.5 * mandatory_courses["career_relevance"] +     # Reduced weight for career relevance
                1.0 * mandatory_courses["interest_similarity"] +
                0.5 * mandatory_courses["skill_alignment"] +
                1.0 * mandatory_courses["semester_appropriateness"] +
                1.0  # Base score for being mandatory
            )
            mandatory_courses = mandatory_courses.sort_values("total_score", ascending=False)

        # Enhanced scoring for optional courses - balance interests and academic progression
        if not optional_courses.empty:
            # Add academic progression scoring
            optional_courses["academic_appropriateness"] = optional_courses["search_text"].apply(
                lambda x: should_prioritize_course_for_semester(x, sem, career_keywords)
            )

            optional_courses["total_score"] = (
                2.5 * optional_courses["interest_similarity"] +    # Keep high weight for interests
                2.0 * optional_courses["academic_appropriateness"] + # NEW: Academic progression
                1.5 * optional_courses["career_relevance"] +      # Reduced weight
                1.0 * optional_courses["skill_alignment"] +
                1.0 * optional_courses["semester_appropriateness"]
            )
            optional_courses = optional_courses.sort_values("total_score", ascending=False)

        # Build semester course list with saturation control
        current_courses = semesters_map[sem]
        year_key = get_year_key(sem)
        max_specialization = ACADEMIC_PROGRESSION.get(year_key, ACADEMIC_PROGRESSION["year_4"])["max_specialization"]

        # Phase 1: Add mandatory courses - prioritize academic progression
        mandatory_added = 0

        # First, add core mandatory subjects that fit academic progression
        core_mandatory = mandatory_courses[mandatory_courses["is_core_subject"] == True]
        for _, course in core_mandatory.iterrows():
            if (mandatory_added < TARGET_MANDATORY_PER_SEM and
                len(current_courses) < MAX_COURSES_PER_SEM):

                course_dict = course.to_dict()
                # Ensure all required fields are present
                course_dict["skill_alignment"] = course_dict.get("skill_alignment", 0.0)
                course_dict["career_relevance"] = course_dict.get("career_relevance", 0.0)
                course_dict["is_core_subject"] = course_dict.get("is_core_subject", False)
                course_dict["academic_appropriateness"] = course_dict.get("academic_appropriateness", 0.0)

                current_courses.append(course_dict)
                completed_courses.add(course["course_code"])
                mandatory_added += 1
                print(f"  Added core mandatory: {course['course_code']} - {course['course_title'][:40]} (acad: {course.get('academic_appropriateness', 0):.1f})")

        # Then add other high-scoring mandatory courses
        other_mandatory = mandatory_courses[mandatory_courses["is_core_subject"] == False]
        for _, course in other_mandatory.iterrows():
            if (mandatory_added < TARGET_MANDATORY_PER_SEM and
                len(current_courses) < MAX_COURSES_PER_SEM):

                course_dict = course.to_dict()
                # Ensure all required fields are present
                course_dict["skill_alignment"] = course_dict.get("skill_alignment", 0.0)
                course_dict["career_relevance"] = course_dict.get("career_relevance", 0.0)
                course_dict["is_core_subject"] = course_dict.get("is_core_subject", False)
                course_dict["academic_appropriateness"] = course_dict.get("academic_appropriateness", 0.0)

                current_courses.append(course_dict)
                completed_courses.add(course["course_code"])
                mandatory_added += 1
                print(f"  Added mandatory: {course['course_code']} - {course['course_title'][:40]} (acad: {course.get('academic_appropriateness', 0):.1f})")

        # Phase 2: Add optional courses with specialization saturation control
        optional_added = 0
        specialization_count = calculate_specialization_saturation(current_courses, career_keywords)

        for _, course in optional_courses.iterrows():
            total_credits, course_count = calculate_semester_stats(current_courses)

            # Check if this course would exceed specialization limit
            course_text = (course.get("course_title", "") + " " + course.get("course_description", "")).lower()
            is_specialized = any(keyword in course_text for keyword in career_keywords[:8])

            # Specialization saturation check
            if is_specialized and specialization_count >= max_specialization:
                print(f"  Skipped {course['course_code']} - specialization saturation ({specialization_count}/{max_specialization})")
                continue

            # Stricter limits on optional courses
            if (course_count < MAX_COURSES_PER_SEM and
                optional_added < MAX_ELECTIVES_PER_SEM and
                (not meets_semester_requirements(current_courses) or optional_added < TARGET_ELECTIVES_PER_SEM)):

                course_dict = course.to_dict()
                # Ensure all required fields are present
                course_dict["skill_alignment"] = course_dict.get("skill_alignment", 0.0)
                course_dict["career_relevance"] = course_dict.get("career_relevance", 0.0)
                course_dict["is_core_subject"] = course_dict.get("is_core_subject", False)
                course_dict["academic_appropriateness"] = course_dict.get("academic_appropriateness", 0.0)

                current_courses.append(course_dict)
                completed_courses.add(course["course_code"])
                optional_added += 1

                # Update specialization count
                if is_specialized:
                    specialization_count += 1

                print(f"  Added elective: {course['course_code']} - {course['course_title'][:40]} (acad: {course.get('academic_appropriateness', 0):.1f})")

                # Check if semester is now complete
                if meets_semester_requirements(current_courses) and optional_added >= TARGET_ELECTIVES_PER_SEM:
                    break

        # Update semester mapping
        semesters_map[sem] = current_courses

        # Print semester summary
        total_credits, course_count = calculate_semester_stats(current_courses)
        mand_count = count_mandatory_in_semester(current_courses)
        opt_count = count_optional_in_semester(current_courses)
        print(f"  📊 Semester {sem} summary: {course_count} courses ({mand_count} mandatory, {opt_count} optional), {total_credits} credits")

    # Create output dataframe
    rows = []
    for sem in range(1, semesters+1):
        courses_list = semesters_map[sem]
        total_credits, course_count = calculate_semester_stats(courses_list)

        rows.append({
            "semester_num": sem,
            "num_courses": course_count,
            "total_credits": total_credits,
            "meets_requirements": meets_semester_requirements(courses_list),
            "mandatory_count": count_mandatory_in_semester(courses_list),
            "optional_count": count_optional_in_semester(courses_list),
            "courses": [
                {
                    "course_code": c.get("course_code"),
                    "course_title": c.get("course_title"),
                    "basket_name": c.get("basket_name"),
                    "basket_category": get_basket_category(c.get("basket_name")),
                    "BTECH_Classification": c.get("BTECH_Classification"),
                    "credits": c.get("credits", DEFAULT_CREDITS),
                    "course_type": c.get("course_type", "Theory"),
                    "complexity": c.get("course_complexity"),
                    "faculty_name": c.get("Faculty_Name", "TBA"),
                    "interest_similarity": float(c.get("interest_similarity", 0.0)),
                    "career_relevance": float(c.get("career_relevance", 0.0)),
                    "is_core_subject": bool(c.get("is_core_subject", False)),
                    "total_score": float(c.get("total_score", 0.0))
                } for c in courses_list
            ]
        })

    return pd.DataFrame(rows)

def export_roadmap_to_excel(roadmap_df, out_path=OUTPUT_PATH):
    """Export roadmap to Excel file"""
    out_rows = []
    for _, r in roadmap_df.iterrows():
        for c in r['courses']:
            out_rows.append({
                "semester_num": r['semester_num'],
                "course_code": c.get("course_code", ""),
                "course_title": c.get("course_title", ""),
                "basket_name": c.get("basket_name", ""),
                "basket_category": c.get("basket_category", ""),
                "BTECH_Classification": c.get("BTECH_Classification", ""),
                "credits": c.get("credits", DEFAULT_CREDITS),
                "course_type": c.get("course_type", "Theory"),
                "complexity": c.get("complexity", "foundation"),
                "faculty_name": c.get("faculty_name", "TBA"),
                "interest_similarity": c.get("interest_similarity", 0.0),
                "skill_alignment": c.get("skill_alignment", 0.0),
                "career_relevance": c.get("career_relevance", 0.0),
                "is_core_subject": c.get("is_core_subject", False),
                "total_score": c.get("total_score", 0.0),
                "semester_total_credits": r['total_credits'],
                "semester_mandatory_count": r['mandatory_count'],
                "semester_optional_count": r['optional_count']
            })

    out_df = pd.DataFrame(out_rows)
    out_df.to_excel(out_path, index=False)
    print(f"\nRoadmap exported to {out_path}")

# ---------- Main ----------
def main():
    print("🎓 Advanced Course Recommender System - Updated Version\n")
    print("=" * 60)

    # Load dataset
    df = load_dataset(DATA_PATH)

    # Show dataset statistics
    mand_count = df[df["BTECH_Classification"].str.lower().str.contains("mandatory", na=False)].shape[0]
    opt_count = df[~df["BTECH_Classification"].str.lower().str.contains("mandatory", na=False)].shape[0]
    print(f"📊 Dataset contains: {mand_count} mandatory courses, {opt_count} optional courses")

    # Show unique basket categories
    baskets = df["basket_name"].value_counts().head(10)
    print(f"🗂️  Top course baskets: {', '.join(baskets.index[:5].tolist())}")

    print("\n" + "=" * 60)
    print("🔥 Let's build your personalized academic roadmap!")
    print("=" * 60)

    # Get user inputs
    career_goal = input("\n🎯 What's your career goal? (e.g., 'AI Engineer', 'Data Scientist', 'Full Stack Developer'): ").strip()
    interests = input("💡 What are your interests? (comma-separated, e.g., 'machine learning, web development, mobile apps'): ").strip()
    skill_level = input("📈 What's your current skill level? (Beginner/Intermediate/Advanced) [Intermediate]: ").strip() or "Intermediate"

    print(f"\n🎯 Target per semester:")
    print(f"   • Target {TARGET_MANDATORY_PER_SEM} mandatory courses (prioritizing core subjects)")
    print(f"   • Target {TARGET_ELECTIVES_PER_SEM} elective courses (max {MAX_ELECTIVES_PER_SEM})")
    print(f"   • {MIN_CREDITS_PER_SEM}+ credits total")
    print(f"   • Courses selected based on career relevance and interests: {interests}")

    # Generate initial roadmap
    roadmap = generate_roadmap(df, career_goal, interests, skill_level=skill_level, semesters=SEMESTERS)

    # First, display the generated roadmap
    display_roadmap_interactive(roadmap)

    # Show initial summary
    total_credits = sum(r['total_credits'] for _, r in roadmap.iterrows())
    total_courses = sum(r['num_courses'] for _, r in roadmap.iterrows())
    total_mandatory = sum(r['mandatory_count'] for _, r in roadmap.iterrows())
    total_optional = sum(r['optional_count'] for _, r in roadmap.iterrows())
    sems_meeting_req = sum(1 for _, r in roadmap.iterrows() if r['meets_requirements'])

    print("\n" + "=" * 100)
    print("ROADMAP SUMMARY")
    print("=" * 100)
    print(f"Total Credits: {total_credits}")
    print(f"Total Courses: {total_courses} (Mandatory: {total_mandatory}, Elective: {total_optional})")
    print(f"Semesters Meeting Requirements: {sems_meeting_req}/{SEMESTERS}")
    print(f"Mandatory/Elective Ratio: {total_mandatory/total_courses:.1%} / {total_optional/total_courses:.1%}")

    # Now ask if user wants to customize
    print("\n" + "=" * 60)
    print("ROADMAP CUSTOMIZATION")
    print("=" * 60)
    customize = input("Would you like to customize your roadmap by adding/removing courses? (y/n): ").strip().lower()

    if customize in ['y', 'yes']:
        # Launch interactive editor
        career_keywords = get_career_keywords(career_goal)
        interactive_roadmap_editor(roadmap, df, career_keywords)
    else:
        # Export the original roadmap
        export_roadmap_to_excel(roadmap, OUTPUT_PATH)
        print(f"\nYour personalized roadmap is ready!")
        print(f"Check '{OUTPUT_PATH}' for the detailed Excel version.")

def display_roadmap_interactive(roadmap_df):
    """Display roadmap with interactive options"""
    print("\n" + "=" * 100)
    print("YOUR PERSONALIZED 8-SEMESTER ACADEMIC ROADMAP")
    print("=" * 100)

    for _, r in roadmap_df.iterrows():
        status_icon = "✅" if r['meets_requirements'] else "⚠️"
        year = get_academic_year(r['semester_num'])

        print(f"\n{status_icon} Semester {r['semester_num']} (Year {year}) | {r['num_courses']} courses | {r['total_credits']} credits | "
              f"Mandatory: {r['mandatory_count']}, Elective: {r['optional_count']}")
        print("-" * 100)

        for i, c in enumerate(r['courses'], 1):
            mand_flag = ("MAND" if "mandatory" in str(c['BTECH_Classification']).lower() else "ELEC")
            title_short = c['course_title'][:35] + "..." if len(c['course_title']) > 35 else c['course_title']
            core_flag = " [CORE]" if c.get('is_core_subject', False) else ""
            acad_score = c.get('academic_appropriateness', 0)

            print(f"  {i}. {c['course_code']:<10} | {title_short:<38} | {c['basket_category']:<8} | {mand_flag:<4} | "
                  f"{c['credits']}cr | {c['complexity']:<12} | Acad:{acad_score:.1f} Int:{c['interest_similarity']:.2f}{core_flag}")

def get_available_courses_for_addition(df, roadmap_df, semester):
    """Get courses that can be added to a specific semester"""
    # Get already assigned courses
    assigned_courses = set()
    for _, r in roadmap_df.iterrows():
        for c in r['courses']:
            assigned_courses.add(c['course_code'])

    # Get available courses for this semester
    available = df[
        (~df["course_code"].isin(assigned_courses)) &
        (df["allowed_sems_list"].apply(lambda x: semester in x))
    ].copy()

    return available

def add_course_to_roadmap(roadmap_df, df, semester, course_code):
    """Add a specific course to a semester"""
    # Find the course in the dataset
    course_match = df[df["course_code"] == course_code]
    if course_match.empty:
        print(f"Course {course_code} not found in dataset.")
        return roadmap_df

    course = course_match.iloc[0]

    # Create course dictionary with all required fields
    course_dict = {
        "course_code": course.get("course_code"),
        "course_title": course.get("course_title"),
        "basket_name": course.get("basket_name"),
        "basket_category": get_basket_category(course.get("basket_name")),
        "BTECH_Classification": course.get("BTECH_Classification"),
        "credits": course.get("credits", DEFAULT_CREDITS),
        "course_type": course.get("course_type", "Theory"),
        "complexity": course.get("course_complexity", "intermediate"),
        "faculty_name": course.get("Faculty_Name", "TBA"),
        "interest_similarity": float(course.get("interest_similarity", 0.0)),
        "skill_alignment": float(course.get("skill_alignment", 0.0)),
        "career_relevance": float(course.get("career_relevance", 0.0)),
        "is_core_subject": bool(course.get("is_core_subject", False)),
        "academic_appropriateness": float(course.get("academic_appropriateness", 0.0)),
        "total_score": float(course.get("total_score", 0.0))
    }

    # Add to the specific semester
    semester_idx = roadmap_df[roadmap_df['semester_num'] == semester].index[0]
    roadmap_df.loc[semester_idx, 'courses'].append(course_dict)

    # Update semester stats
    courses_list = roadmap_df.loc[semester_idx, 'courses']
    total_credits, course_count = calculate_semester_stats(courses_list)
    roadmap_df.loc[semester_idx, 'num_courses'] = course_count
    roadmap_df.loc[semester_idx, 'total_credits'] = total_credits
    roadmap_df.loc[semester_idx, 'meets_requirements'] = meets_semester_requirements(courses_list)
    roadmap_df.loc[semester_idx, 'mandatory_count'] = count_mandatory_in_semester(courses_list)
    roadmap_df.loc[semester_idx, 'optional_count'] = count_optional_in_semester(courses_list)

    print(f"Added {course_code} - {course.get('course_title')} to Semester {semester}")
    return roadmap_df

def remove_course_from_roadmap(roadmap_df, semester, course_index):
    """Remove a course from a semester by its position"""
    semester_idx = roadmap_df[roadmap_df['semester_num'] == semester].index[0]
    courses_list = roadmap_df.loc[semester_idx, 'courses']

    if 0 <= course_index < len(courses_list):
        removed_course = courses_list.pop(course_index)

        # Update semester stats
        total_credits, course_count = calculate_semester_stats(courses_list)
        roadmap_df.loc[semester_idx, 'num_courses'] = course_count
        roadmap_df.loc[semester_idx, 'total_credits'] = total_credits
        roadmap_df.loc[semester_idx, 'meets_requirements'] = meets_semester_requirements(courses_list)
        roadmap_df.loc[semester_idx, 'mandatory_count'] = count_mandatory_in_semester(courses_list)
        roadmap_df.loc[semester_idx, 'optional_count'] = count_optional_in_semester(courses_list)

        print(f"Removed {removed_course['course_code']} - {removed_course['course_title']} from Semester {semester}")
    else:
        print("Invalid course number.")

    return roadmap_df

def interactive_roadmap_editor(roadmap_df, df, career_keywords):
    """Interactive editor for modifying the roadmap"""
    while True:
        display_roadmap_interactive(roadmap_df)

        print("\n" + "=" * 100)
        print("ROADMAP CUSTOMIZATION OPTIONS")
        print("=" * 100)
        print("1. Add a course to a semester")
        print("2. Remove a course from a semester")
        print("3. View available courses for a semester")
        print("4. View roadmap summary")
        print("5. Export and finish")
        print("6. Exit without saving")

        choice = input("\nEnter your choice (1-6): ").strip()

        if choice == "1":
            # Add course
            try:
                semester = int(input("Which semester to add to (1-8)? "))
                if not 1 <= semester <= 8:
                    print("Invalid semester. Please enter 1-8.")
                    continue

                # Show available courses
                available = get_available_courses_for_addition(df, roadmap_df, semester)
                if available.empty:
                    print("No courses available to add to this semester.")
                    continue

                print(f"\nAvailable courses for Semester {semester}:")
                print("-" * 80)
                # Show top 20 most relevant courses
                available_sorted = available.sort_values(["career_relevance", "interest_similarity"], ascending=False)
                for i, (_, course) in enumerate(available_sorted.head(20).iterrows(), 1):
                    title_short = course['course_title'][:40] + "..." if len(course['course_title']) > 40 else course['course_title']
                    mand_flag = ("MAND" if "mandatory" in str(course['BTECH_Classification']).lower() else "ELEC")
                    print(f"  {i:2}. {course['course_code']:<10} | {title_short:<43} | {mand_flag} | {course['credits']}cr")

                course_code = input("\nEnter course code to add: ").strip().upper()
                roadmap_df = add_course_to_roadmap(roadmap_df, df, semester, course_code)

            except ValueError:
                print("Invalid input. Please enter a valid semester number.")

        elif choice == "2":
            # Remove course
            try:
                semester = int(input("Which semester to remove from (1-8)? "))
                if not 1 <= semester <= 8:
                    print("Invalid semester. Please enter 1-8.")
                    continue

                course_num = int(input("Enter course number to remove: ")) - 1
                roadmap_df = remove_course_from_roadmap(roadmap_df, semester, course_num)

            except ValueError:
                print("Invalid input. Please enter valid numbers.")

        elif choice == "3":
            # View available courses
            try:
                semester = int(input("Which semester to view available courses for (1-8)? "))
                if not 1 <= semester <= 8:
                    print("Invalid semester. Please enter 1-8.")
                    continue

                available = get_available_courses_for_addition(df, roadmap_df, semester)
                if available.empty:
                    print("No courses available for this semester.")
                    continue

                # Add scoring for better recommendations
                query = " ".join(career_keywords)
                vectorizer, X = build_vectorizer(available["search_text"].tolist())
                available["temp_similarity"] = compute_similarity(vectorizer, X, query)

                print(f"\nTop 30 available courses for Semester {semester}:")
                print("-" * 90)
                available_sorted = available.sort_values(["temp_similarity", "credits"], ascending=[False, True])
                for i, (_, course) in enumerate(available_sorted.head(30).iterrows(), 1):
                    title_short = course['course_title'][:45] + "..." if len(course['course_title']) > 45 else course['course_title']
                    mand_flag = ("MAND" if "mandatory" in str(course['BTECH_Classification']).lower() else "ELEC")
                    print(f"  {i:2}. {course['course_code']:<10} | {title_short:<48} | {mand_flag} | {course['credits']}cr | Rel:{course['temp_similarity']:.2f}")

                input("\nPress Enter to continue...")

            except ValueError:
                print("Invalid input. Please enter a valid semester number.")

        elif choice == "4":
            # Show summary
            total_credits = sum(r['total_credits'] for _, r in roadmap_df.iterrows())
            total_courses = sum(r['num_courses'] for _, r in roadmap_df.iterrows())
            total_mandatory = sum(r['mandatory_count'] for _, r in roadmap_df.iterrows())
            total_optional = sum(r['optional_count'] for _, r in roadmap_df.iterrows())
            sems_meeting_req = sum(1 for _, r in roadmap_df.iterrows() if r['meets_requirements'])

            print("\n" + "=" * 60)
            print("ROADMAP SUMMARY")
            print("=" * 60)
            print(f"Total Credits: {total_credits}")
            print(f"Total Courses: {total_courses} (Mandatory: {total_mandatory}, Elective: {total_optional})")
            print(f"Semesters Meeting Requirements: {sems_meeting_req}/8")
            print(f"Mandatory/Elective Ratio: {total_mandatory/total_courses:.1%} / {total_optional/total_courses:.1%}")

            input("\nPress Enter to continue...")

        elif choice == "5":
            # Export and finish
            export_roadmap_to_excel(roadmap_df, OUTPUT_PATH)
            print("\nYour customized roadmap has been saved!")
            break

        elif choice == "6":
            # Exit without saving
            print("Exiting without saving changes.")
            break

        else:
            print("Invalid choice. Please enter 1-6.")

if __name__ == "__main__":
    main()

🎓 Advanced Course Recommender System - Updated Version

✅ Loaded dataset with 773 courses
📊 Dataset contains: 227 mandatory courses, 546 optional courses
🗂️  Top course baskets: Open Elective, Foundation Core - Basic Sciences and Mathematics, Discipline Core, Specialization Elective, Non-graded Core Requirement

🔥 Let's build your personalized academic roadmap!

🎯 What's your career goal? (e.g., 'AI Engineer', 'Data Scientist', 'Full Stack Developer'): Data Scientist 
💡 What are your interests? (comma-separated, e.g., 'machine learning, web development, mobile apps'): machine learning, psychology, mathematics, AI 
📈 What's your current skill level? (Beginner/Intermediate/Advanced) [Intermediate]: beginner 

🎯 Target per semester:
   • Target 4 mandatory courses (prioritizing core subjects)
   • Target 2 elective courses (max 3)
   • 24+ credits total
   • Courses selected based on career relevance and interests: machine learning, psychology, mathematics, AI

🎯 Starting roadmap generati

In [None]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta
import json

# Modified Timetable Configuration - 9 AM to 5 PM, Monday to Friday
TIME_SLOTS = [
    "09:00-10:00", "10:00-11:00", "11:00-12:00", "12:00-13:00",
    "13:00-14:00", "14:00-15:00", "15:00-16:00", "16:00-17:00"
]

DAYS = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday"]

# Default session settings if not specified in dataset
DEFAULT_SESSIONS_PER_WEEK = {
    "theory": 3,
    "lab": 2,
    "theory + lab": 4,
    "project": 2
}

DEFAULT_SESSION_DURATION = {
    "theory": 60,
    "lab": 120,  # Labs typically longer
    "theory + lab": 60,
    "project": 90
}

class TimetableGenerator:
    def __init__(self, roadmap_df, courses_df):
        self.roadmap_df = roadmap_df
        self.courses_df = courses_df
        self.timetable = {}

    def get_course_details(self, course_code):
        """Get detailed course information from the dataset"""
        course_match = self.courses_df[self.courses_df['course_code'] == course_code]
        if course_match.empty:
            return None

        course = course_match.iloc[0]
        return {
            'code': course.get('course_code', course_code),
            'title': course.get('course_title', 'Unknown Course'),
            'type': str(course.get('course_type', 'Theory')).lower(),
            'sessions_per_week': int(course.get('sessions_per_week', 3)) if pd.notna(course.get('sessions_per_week')) else None,
            'session_minutes': int(course.get('session_minutes', 60)) if pd.notna(course.get('session_minutes')) else None,
            'faculty': course.get('Faculty_Name', 'TBA'),
            'credits': course.get('credits', 4),
            'infrastructure': course.get('infrastructure_notes', '')
        }

    def determine_sessions_and_duration(self, course_details):
        """Determine sessions per week and duration based on course type"""
        course_type = course_details['type']

        # Use dataset values if available
        sessions_per_week = course_details['sessions_per_week']
        session_minutes = course_details['session_minutes']

        # Fall back to defaults based on course type
        if sessions_per_week is None:
            if 'lab' in course_type:
                sessions_per_week = DEFAULT_SESSIONS_PER_WEEK.get('lab', 2)
            elif 'project' in course_type:
                sessions_per_week = DEFAULT_SESSIONS_PER_WEEK.get('project', 2)
            else:
                sessions_per_week = DEFAULT_SESSIONS_PER_WEEK.get('theory', 3)

        if session_minutes is None:
            if 'lab' in course_type:
                session_minutes = DEFAULT_SESSION_DURATION.get('lab', 120)
            elif 'project' in course_type:
                session_minutes = DEFAULT_SESSION_DURATION.get('project', 90)
            else:
                session_minutes = DEFAULT_SESSION_DURATION.get('theory', 60)

        return sessions_per_week, session_minutes

    def calculate_time_slots_needed(self, session_minutes):
        """Calculate how many time slots a session needs"""
        return max(1, (session_minutes + 30) // 60)  # Round up to nearest hour

    def find_available_slots(self, day, slots_needed, existing_schedule):
        """Find available consecutive time slots on a given day"""
        available_starts = []

        for start_idx in range(len(TIME_SLOTS) - slots_needed + 1):
            # Check if all required consecutive slots are free
            conflict = False
            for i in range(slots_needed):
                slot_idx = start_idx + i
                if TIME_SLOTS[slot_idx] in existing_schedule.get(day, {}):
                    conflict = True
                    break

            if not conflict:
                available_starts.append(start_idx)

        return available_starts

    def schedule_course(self, course_details, existing_schedule):
        """Schedule a single course in the timetable"""
        sessions_per_week, session_minutes = self.determine_sessions_and_duration(course_details)
        slots_needed = self.calculate_time_slots_needed(session_minutes)

        scheduled_sessions = []

        for session_num in range(sessions_per_week):
            scheduled = False

            # Try to schedule this session
            days_tried = DAYS.copy()
            random.shuffle(days_tried)  # Randomize to avoid always using Monday first

            for day in days_tried:
                available_starts = self.find_available_slots(day, slots_needed, existing_schedule)

                if available_starts:
                    # Choose a random available start time
                    start_idx = random.choice(available_starts)

                    # Create session info
                    session_info = {
                        'course_code': course_details['code'],
                        'course_title': course_details['title'],
                        'faculty': course_details['faculty'],
                        'type': course_details['type'],
                        'session_num': session_num + 1,
                        'duration_minutes': session_minutes,
                        'infrastructure': course_details['infrastructure']
                    }

                    # Add to schedule
                    if day not in existing_schedule:
                        existing_schedule[day] = {}

                    # Occupy all required time slots
                    for i in range(slots_needed):
                        slot = TIME_SLOTS[start_idx + i]
                        existing_schedule[day][slot] = session_info

                    scheduled_sessions.append({
                        'day': day,
                        'time': f"{TIME_SLOTS[start_idx]}-{TIME_SLOTS[start_idx + slots_needed - 1].split('-')[1]}",
                        'info': session_info
                    })

                    scheduled = True
                    break

            if not scheduled:
                print(f"Warning: Could not schedule session {session_num + 1} for {course_details['code']}")

        return scheduled_sessions

    def generate_semester_timetable(self, semester_num):
        """Generate timetable for a specific semester"""
        print(f"\nGenerating timetable for Semester {semester_num}...")

        # Get courses for this semester
        semester_data = self.roadmap_df[self.roadmap_df['semester_num'] == semester_num]
        if semester_data.empty:
            print(f"No courses found for semester {semester_num}")
            return None, None

        courses_list = semester_data.iloc[0]['courses']

        # Initialize empty schedule
        schedule = {}
        all_scheduled_sessions = []

        # Schedule each course
        for course in courses_list:
            course_code = course['course_code']
            course_details = self.get_course_details(course_code)

            if course_details is None:
                # Fallback for courses not in detailed dataset
                course_details = {
                    'code': course_code,
                    'title': course.get('course_title', 'Unknown Course'),
                    'type': course.get('course_type', 'theory').lower(),
                    'sessions_per_week': None,
                    'session_minutes': None,
                    'faculty': course.get('faculty_name', 'TBA'),
                    'credits': course.get('credits', 4),
                    'infrastructure': ''
                }

            scheduled_sessions = self.schedule_course(course_details, schedule)
            all_scheduled_sessions.extend(scheduled_sessions)

        return schedule, all_scheduled_sessions

    def display_timetable(self, schedule, semester_num):
        """Display timetable in a readable format"""
        print(f"\n" + "=" * 100)
        print(f"SEMESTER {semester_num} WEEKLY TIMETABLE (Monday-Friday, 9 AM - 5 PM)")
        print("=" * 100)

        # Create timetable grid
        print(f"{'Time':<12} | {'Monday':<18} | {'Tuesday':<18} | {'Wednesday':<18} | {'Thursday':<18} | {'Friday':<18}")
        print("-" * 100)

        for time_slot in TIME_SLOTS:
            row = f"{time_slot:<12} |"

            for day in DAYS:
                if day in schedule and time_slot in schedule[day]:
                    session = schedule[day][time_slot]
                    cell_text = f"{session['course_code']}"
                    if session.get('type') and 'lab' in session['type']:
                        cell_text += " (L)"
                else:
                    cell_text = ""

                row += f" {cell_text:<17} |"

            print(row)

        print("-" * 100)

        # Course legend
        print("\nCOURSE DETAILS:")
        print("-" * 80)
        unique_courses = {}
        for day in schedule:
            for time_slot in schedule[day]:
                session = schedule[day][time_slot]
                course_code = session['course_code']
                if course_code not in unique_courses:
                    unique_courses[course_code] = session

        for course_code, session in unique_courses.items():
            faculty = session.get('faculty', 'TBA')
            course_type = session.get('type', 'theory').title()
            duration = session.get('duration_minutes', 60)
            title = session.get('course_title', '')[:50]

            print(f"{course_code:<12} | {title:<50} | {faculty:<15} | {course_type} ({duration}min)")

    def export_timetable_to_excel(self, schedule, sessions, semester_num, filename=None):
        """Export semester timetable to Excel"""
        if filename is None:
            filename = f"semester_{semester_num}_timetable.xlsx"

        with pd.ExcelWriter(filename, engine='openpyxl') as writer:
            if schedule is None:
                print(f"No schedule data for semester {semester_num}")
                return

            # Create timetable grid for Excel
            timetable_data = []

            for time_slot in TIME_SLOTS:
                row = {'Time': time_slot}
                for day in DAYS:
                    if day in schedule and time_slot in schedule[day]:
                        session = schedule[day][time_slot]
                        cell_value = f"{session['course_code']} - {session['course_title'][:20]}"
                        if 'lab' in session.get('type', '').lower():
                            cell_value += " (LAB)"
                    else:
                        cell_value = ""
                    row[day] = cell_value
                timetable_data.append(row)

            # Convert to DataFrame and save
            df_timetable = pd.DataFrame(timetable_data)
            df_timetable.to_excel(writer, sheet_name='Timetable', index=False)

            # Also create a detailed course list
            course_details = []
            unique_courses = {}
            for day in schedule:
                for time_slot in schedule[day]:
                    session = schedule[day][time_slot]
                    course_code = session['course_code']
                    if course_code not in unique_courses:
                        unique_courses[course_code] = session

            for course_code, session in unique_courses.items():
                course_details.append({
                    'Course Code': course_code,
                    'Course Title': session.get('course_title', ''),
                    'Faculty': session.get('faculty', 'TBA'),
                    'Type': session.get('type', 'theory').title(),
                    'Duration (min)': session.get('duration_minutes', 60),
                    'Infrastructure': session.get('infrastructure', '')
                })

            df_details = pd.DataFrame(course_details)
            df_details.to_excel(writer, sheet_name='Course_Details', index=False)

        print(f"\nTimetable exported to {filename}")

def get_user_semester_choice():
    """Get semester choice from user"""
    while True:
        try:
            print("\nAvailable Semesters: 1-8")
            semester = input("Enter the semester number for which you want to generate timetable (1-8): ").strip()

            if semester.lower() in ['quit', 'exit', 'q']:
                return None

            semester_num = int(semester)
            if 1 <= semester_num <= 8:
                return semester_num
            else:
                print("Invalid input! Please enter a number between 1 and 8.")
        except ValueError:
            print("Invalid input! Please enter a valid semester number (1-8) or 'quit' to exit.")

def generate_specific_semester_timetable(roadmap_file="generated_roadmap.xlsx", courses_file="courses_with_filled_baskets_types.xlsx"):
    """
    Generate timetable for user-specified semester

    Parameters:
    - roadmap_file: Excel file with generated roadmap
    - courses_file: Original courses dataset with session details
    """

    try:
        # Load roadmap data
        print("Loading roadmap data...")
        roadmap_df = pd.read_excel(roadmap_file)

        # Reconstruct the roadmap structure
        roadmap_structured = []
        for semester_num in range(1, 9):
            semester_courses = roadmap_df[roadmap_df['semester_num'] == semester_num]
            if not semester_courses.empty:
                courses_list = []
                for _, row in semester_courses.iterrows():
                    courses_list.append({
                        'course_code': row['course_code'],
                        'course_title': row['course_title'],
                        'basket_category': row.get('basket_category', ''),
                        'BTECH_Classification': row.get('BTECH_Classification', ''),
                        'credits': row.get('credits', 4),
                        'course_type': row.get('course_type', 'Theory'),
                        'faculty_name': row.get('faculty_name', 'TBA')
                    })

                roadmap_structured.append({
                    'semester_num': semester_num,
                    'courses': courses_list,
                    'total_credits': semester_courses['credits'].sum()
                })

        roadmap_structured_df = pd.DataFrame(roadmap_structured)

        # Load courses dataset
        print("Loading courses dataset...")
        courses_df = pd.read_excel(courses_file)

        # Get user's semester choice
        chosen_semester = get_user_semester_choice()
        if chosen_semester is None:
            print("Timetable generation cancelled.")
            return None, None, None

        # Generate timetable for chosen semester
        generator = TimetableGenerator(roadmap_structured_df, courses_df)
        schedule, sessions = generator.generate_semester_timetable(chosen_semester)

        if schedule:
            # Display the timetable
            generator.display_timetable(schedule, chosen_semester)

            # Ask if user wants to export to Excel
            export_choice = input("\nWould you like to export this timetable to Excel? (y/n): ").strip().lower()
            if export_choice in ['y', 'yes']:
                generator.export_timetable_to_excel(schedule, sessions, chosen_semester)
        else:
            print(f"Could not generate timetable for semester {chosen_semester}")
            return generator, None, chosen_semester

        print(f"\nTimetable generation completed successfully for Semester {chosen_semester}!")

        return generator, (schedule, sessions), chosen_semester

    except FileNotFoundError as e:
        print(f"Error: File not found - {e}")
        print("Make sure you have generated a roadmap first using the course recommender system.")
    except Exception as e:
        print(f"Error generating timetable: {e}")
        return None, None, None

def interactive_timetable_generator():
    """Interactive timetable generator with options"""
    print("=" * 60)
    print("INTERACTIVE WEEKLY TIMETABLE GENERATOR")
    print("Schedule: Monday-Friday, 9 AM - 5 PM")
    print("=" * 60)

    while True:
        print("\nOptions:")
        print("1. Generate timetable for a specific semester")
        print("2. Generate timetable for another semester")
        print("3. Exit")

        choice = input("\nEnter your choice (1-3): ").strip()

        if choice == '1' or choice == '2':
            generator, timetable_data, semester = generate_specific_semester_timetable()

            if generator and timetable_data:
                # Option to view timetable again or modify
                while True:
                    view_choice = input(f"\nWould you like to:\n1. View semester {semester} timetable again\n2. Generate another semester's timetable\n3. Go back to main menu\nEnter choice (1-3): ").strip()

                    if view_choice == '1':
                        schedule, sessions = timetable_data
                        generator.display_timetable(schedule, semester)
                    elif view_choice == '2':
                        break  # Go back to semester selection
                    elif view_choice == '3':
                        break  # Go back to main menu
                    else:
                        print("Invalid choice. Please enter 1, 2, or 3.")

        elif choice == '3':
            print("Thank you for using the Timetable Generator!")
            break
        else:
            print("Invalid choice. Please enter 1, 2, or 3.")

# Example usage
if __name__ == "__main__":
    interactive_timetable_generator()

INTERACTIVE WEEKLY TIMETABLE GENERATOR
Schedule: Monday-Friday, 9 AM - 5 PM

Options:
1. Generate timetable for a specific semester
2. Generate timetable for another semester
3. Exit

Enter your choice (1-3): 1
Loading roadmap data...
Loading courses dataset...

Available Semesters: 1-8
Enter the semester number for which you want to generate timetable (1-8): 3

Generating timetable for Semester 3...

SEMESTER 3 WEEKLY TIMETABLE (Monday-Friday, 9 AM - 5 PM)
Time         | Monday             | Tuesday            | Wednesday          | Thursday           | Friday            
----------------------------------------------------------------------------------------------------
09:00-10:00  |                   | CFOC309M          |                   |                   | CFOC107M          |
10:00-11:00  | CFOC469M          | CFOC544M          | CFOC503M          | CFOC118M          |                   |
11:00-12:00  | CFOC107M          | CFOC469M          |                   | CFOC107M      

In [None]:
import pickle
import os
from google.colab import files

# Save your trained models
def save_models():
    # Save course recommender
    with open('course_recommender.pkl', 'wb') as f:
        pickle.dump(your_course_recommender_instance, f)

    # Save timetable generator
    with open('timetable_generator.pkl', 'wb') as f:
        pickle.dump(your_timetable_generator_instance, f)

    # Download the files
    files.download('course_recommender.pkl')
    files.download('timetable_generator.pkl')

save_models()