In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [1]:
import pandas as pd
import re
import os

# Define file paths for Kaggle input directory
dataset_dir = '/kaggle/input/syllabus-data-new/'
cse_file = os.path.join(dataset_dir, 'CSE_Syllabus_Complete.xlsx')
ece_file = os.path.join(dataset_dir, 'ECE_Syllabus_Final.xlsx')
it_file = os.path.join(dataset_dir, 'IT_Complete_Syllabus.xlsx')

# Function to load and parse dataframe from xlsx file
def load_df(file_path, dept):
    df = pd.read_excel(file_path)
    df['Department'] = dept
    df['Semester'] = pd.to_numeric(df['Semester'], errors='coerce')
    df = df.dropna(subset=['Semester'])
    return df

# Load the three files
cse_df = load_df(cse_file, 'CSE')
ece_df = load_df(ece_file, 'ECE')
it_df = load_df(it_file, 'IT')

# Combine all dataframes
all_df = pd.concat([cse_df, ece_df, it_df], ignore_index=True)

# Parse data into dictionary
def parse_data(df):
    data = {}
    for _, row in df.iterrows():
        sem = int(row['Semester'])
        if 'Subject (with Paper Code)' in df.columns and pd.notna(row.get('Subject (with Paper Code)', None)):
            subject = row['Subject (with Paper Code)']
        elif 'Subject' in df.columns and pd.notna(row.get('Subject', None)):
            subject = row['Subject']
        else:
            continue
        dept = row['Department']
        if pd.isna(subject):
            continue
        subject = re.sub(r'\s+', ' ', str(subject)).strip()
        if not subject:
            continue
        if dept not in data:
            data[dept] = {}
        if sem not in data[dept]:
            data[dept][sem] = []
        if subject not in data[dept][sem]:
            data[dept][sem].append(subject)
    return data

# Build the data dictionary
all_data = parse_data(all_df)

# Function to get subjects
def get_subjects(department, semester):
    dept = department.upper()
    try:
        semester = int(semester)
        if dept in all_data and semester in all_data[dept]:
            return all_data[dept][semester]
        return []
    except ValueError:
        return []



In [2]:
# Test query
query = "IT 1"  # Example query, modify as needed (e.g., "ECE 3", "IT 5")
query_results = []
try:
    dept, sem = query.split(maxsplit=1)
    query_results = get_subjects(dept, sem)
except ValueError:
    print("Invalid query format. Use format: 'department semester' (e.g., 'CSE 5')")

# Output results
print("Query:", query)
print("Retrieved Subjects:", query_results)


Query: IT 1
Retrieved Subjects: ['BS- - Mathematics –IA', 'BS-PH101 - Physics-I', 'ES-EE101 - Basic Electrical Engineering', 'BS-PH191 - Physics-I Laboratory', 'ES-EE191 - Basic Electrical Engineering Lab', 'ES-ME192 - Workshop']


In [3]:
# Test query
query = "CSE 4"  # Example query, modify as needed (e.g., "ECE 3", "IT 5")
query_results = []
try:
    dept, sem = query.split(maxsplit=1)
    query_results = get_subjects(dept, sem)
except ValueError:
    print("Invalid query format. Use format: 'department semester' (e.g., 'CSE 5')")
    query_results = []

# Output results with improved formatting
print(f"\nQuery: {query}")
print("\nRetrieved Subjects (Individual):")
if query_results:
    for i, subject in enumerate(query_results, 1):
        print(f"{i}. {subject}")
else:
    print("No subjects found for the given department and semester.")
print("\nRetrieved Subjects (List Format):")
print(query_results)



Query: CSE 4

Retrieved Subjects (Individual):
1. Biology (BSC)
2. Computer Architecture (PCC-CS402)
3. Computer Architecture Lab (PCC-CS492)
4. Design &Analysis Algorithm Lab (PCC-CS494)
5. Design and Analysis ofAlgorithms (PCC-CS404)
6. Discrete Mathematics (PCC-CS401)
7. Environmental Sciences (MC-401)
8. Formal Language & Automata Theory (PCC-CS403)

Retrieved Subjects (List Format):
['Biology (BSC)', 'Computer Architecture (PCC-CS402)', 'Computer Architecture Lab (PCC-CS492)', 'Design &Analysis Algorithm Lab (PCC-CS494)', 'Design and Analysis ofAlgorithms (PCC-CS404)', 'Discrete Mathematics (PCC-CS401)', 'Environmental Sciences (MC-401)', 'Formal Language & Automata Theory (PCC-CS403)']


rag implementation

In [4]:
!pip install --no-deps PyPDF2 python-docx langchain langchain-community langchain-huggingface groq sentence-transformers pymupdf

Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Collecting python-docx
  Downloading python_docx-1.2.0-py3-none-any.whl.metadata (2.0 kB)
Collecting langchain-community
  Downloading langchain_community-0.3.29-py3-none-any.whl.metadata (2.9 kB)
Collecting langchain-huggingface
  Downloading langchain_huggingface-0.3.1-py3-none-any.whl.metadata (996 bytes)
Collecting groq
  Downloading groq-0.31.1-py3-none-any.whl.metadata (16 kB)
Collecting pymupdf
  Downloading pymupdf-1.26.4-cp39-abi3-manylinux_2_28_x86_64.whl.metadata (3.4 kB)
Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading python_docx-1.2.0-py3-none-any.whl (252 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m253.0/253.0 kB[0m [31m14.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading langchain_community-0.3.29-py3-none-any.whl (2.5 MB)
[2K   

In [5]:
!pip install langchain-groq
!pip install docx2txt
!pip install faiss-gpu
!pip install faiss-cpu
!pip install supabase

Collecting langchain-groq
  Downloading langchain_groq-0.3.8-py3-none-any.whl.metadata (2.6 kB)
Collecting langchain-core<1.0.0,>=0.3.75 (from langchain-groq)
  Downloading langchain_core-0.3.76-py3-none-any.whl.metadata (3.7 kB)
Downloading langchain_groq-0.3.8-py3-none-any.whl (16 kB)
Downloading langchain_core-0.3.76-py3-none-any.whl (447 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m447.5/447.5 kB[0m [31m14.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: langchain-core, langchain-groq
  Attempting uninstall: langchain-core
    Found existing installation: langchain-core 0.3.66
    Uninstalling langchain-core-0.3.66:
      Successfully uninstalled langchain-core-0.3.66
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
langchain-community 0.3.29 requires httpx-sse<1.0.0,>=0.4.0, which is not installed.
langchain-c

In [6]:
import os
import PyPDF2
import docx
import pandas as pd
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import SupabaseVectorStore
from langchain.schema import Document
from groq import Groq
from supabase import create_client, Client
import ipywidgets as widgets
from IPython.display import display, clear_output

In [7]:
import os
import traceback
import fitz
import pandas as pd
from pathlib import Path

In [8]:
# Initialize Supabase client
SUPABASE_URL = "https://huvilonasynwyqycxyyf.supabase.co"
SUPABASE_KEY = "REDACTED_SUPABASE_KEY"
supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)

In [9]:
# Initialize Groq client
os.environ["GROQ_API_KEY"] = "REDACTED_GROQ_KEY"
client = Groq()

In [10]:
from langchain_community.document_loaders import (
    PyPDFLoader, TextLoader, CSVLoader, Docx2txtLoader
)
from langchain_core.documents import Document

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_groq import ChatGroq
from langchain.chains import RetrievalQA

In [11]:
# ================== ROBUST LOADERS ==================
class RobustPDFLoader(PyPDFLoader):
    """Try PyMuPDF first, then fall back to PyPDFLoader."""
    def load(self):
        try:
            doc = fitz.open(self.file_path)
            pages = []
            for i, page in enumerate(doc):
                text = page.get_text("text") or ""
                if text.strip():
                    pages.append(Document(
                        page_content=text,
                        metadata={
                            "source": str(self.file_path),
                            "page": i+1,
                            "loader": "pymupdf"
                        }
                    ))
            doc.close()
            if pages:
                return pages
        except Exception as e:
            print(f"[PDF] PyMuPDF failed for {self.file_path}: {e}\n{traceback.format_exc()}")
        try:
            return super().load()
        except Exception as e:
            print(f"[PDF] PyPDFLoader failed for {self.file_path}: {e}\n{traceback.format_exc()}")
            return []


def load_excel_to_documents(file_path: str) -> list[Document]:
    docs = []
    try:
        xls = pd.ExcelFile(file_path)
        for sheet_name in xls.sheet_names:
            df = xls.parse(sheet_name)
            text = df.to_csv(index=False)
            if text.strip():
                docs.append(Document(
                    page_content=text,
                    metadata={
                        "source": str(file_path),
                        "sheet": sheet_name,
                        "loader": "pandas_excel"
                    }
                ))
    except Exception as e:
        print(f"[EXCEL] Failed {file_path}: {e}\n{traceback.format_exc()}")
    return docs


def load_file_to_documents(file_path: str) -> list[Document]:
    """Load a single file into Documents depending on type."""
    ext = Path(file_path).suffix.lower()
    try:
        if ext == ".pdf":
            return RobustPDFLoader(file_path).load()
        elif ext in [".txt", ".md", ".log"]:
            return TextLoader(file_path, autodetect_encoding=True).load()
        elif ext == ".csv":
            return CSVLoader(file_path).load()
        elif ext in [".xls", ".xlsx"]:
            return load_excel_to_documents(file_path)
        elif ext == ".docx":
            return Docx2txtLoader(file_path).load()
        else:
            print(f"[SKIP] Unsupported file type: {file_path}")
            return []
    except Exception as e:
        print(f"[LOAD ERR] {file_path}: {e}\n{traceback.format_exc()}")
        return []

def gather_documents_recursive(root_dir: str, allowed_ext=None) -> list[Document]:
    """Walk directory recursively, load all supported files into Documents."""
    if allowed_ext is None:
        allowed_ext = {".pdf", ".txt", ".csv", ".xls", ".xlsx", ".docx"}
    all_docs = []
    for dirpath, _, filenames in os.walk(root_dir):
        for name in filenames:
            ext = Path(name).suffix.lower()
            if ext in allowed_ext:
                fpath = os.path.join(dirpath, name)
                docs = load_file_to_documents(fpath)
                for d in docs:
                    d.metadata.setdefault("source", fpath)
                    d.metadata["relpath"] = os.path.relpath(fpath, root_dir)
                    d.metadata["subfolder"] = os.path.relpath(dirpath, root_dir)
                all_docs.extend(docs)
    return all_docs
# ================== RAG PIPELINE ==================
def build_vectorstore(root_dir: str, embeddings_model="sentence-transformers/all-MiniLM-L6-v2"):
    print(f"[INFO] Loading documents from {root_dir} ...")
    docs = gather_documents_recursive(root_dir)

    print(f"[INFO] Loaded {len(docs)} raw documents. Splitting ...")
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    chunks = splitter.split_documents(docs)

    print(f"[INFO] Split into {len(chunks)} chunks. Building FAISS index ...")
    embeddings = HuggingFaceEmbeddings(model_name=embeddings_model)
    vectorstore = FAISS.from_documents(chunks, embeddings)

    return vectorstore


def build_rag_chain(vectorstore, groq_api_key: str, model="llama-3.3-70b-versatile"):
    llm = ChatGroq(api_key=groq_api_key, model=model)
    retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        retriever=retriever,
        chain_type="stuff",
        return_source_documents=True
    )
    return qa_chain


def rag_query(qa_chain, query: str):
    result = qa_chain.invoke({"query": query})
    print("\n=== ANSWER ===")
    print(result["result"])

    print("\n=== SOURCES ===")
    for doc in result["source_documents"]:
        print(f"- {doc.metadata.get('relpath', doc.metadata.get('source'))}")
    return result


# ================== USAGE EXAMPLE ==================
# ================== USAGE EXAMPLE ==================
if __name__ == "__main__":
    DATA_DIR = "/kaggle/input/full-final-rag-nep/MAKAUT_Syllabus"   # <-- adjust path in Kaggle
    GROQ_API_KEY = "REDACTED_GROQ_KEY"                # <-- don't hardcode, use Kaggle secrets if possible

    # Build Vectorstore
    vs = build_vectorstore(DATA_DIR)

    # Create RAG Chain
    rag_chain = build_rag_chain(vs, GROQ_API_KEY)

    # Query Example
    rag_query(rag_chain, "Operating Systm in taught in which Semester")

[INFO] Loading documents from /kaggle/input/full-final-rag-nep/MAKAUT_Syllabus ...
[INFO] Loaded 212 raw documents. Splitting ...
[INFO] Split into 697 chunks. Building FAISS index ...


2025-09-17 22:37:49.384672: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1758148669.588678      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1758148669.649152      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]


=== ANSWER ===
The provided context does not explicitly mention the semester in which the Operating System course is taught. It only provides the syllabus and course outcomes for the Operating System course (PCC-CS502) for B. Tech in Information Technology, but does not specify the semester.

=== SOURCES ===
- IT/AllSem23.pdf
- IT/AllSem23.pdf
- CSE/AllSem23.pdf
- CSE/AllSem23.pdf
- CSE/AllSem23.pdf


In [12]:
# Initialize an empty dictionary to store subject and contact hours
contact_hours_dict = {}

# Function to extract contact hours from the response string
def extract_contact_hours(response):
    if not response or "don't know" in response.lower():
        return None  # Return None for unknown hours
    # Extract the part like "2L+1T" or "3L/week" from the response
    import re
    match = re.search(r'(\d+[LPT](?:\+?\d*[LPT]?)?(?:/week)?)', response)
    return match.group(0) if match else None

# Iterate over the query_results list
for subject in query_results:
    # Construct the query for contact hours
    query = f"Contact hours/week of {subject}"
    # Call the rag_query function (assuming it returns a dictionary with 'result')
    response = rag_query(rag_chain, query)
    # Extract the contact hours string
    contact_hours = extract_contact_hours(response.get('result', '') if isinstance(response, dict) else response)
    # Store the result in the dictionary (use None or a default if not found)
    contact_hours_dict[subject] = contact_hours if contact_hours else "Unknown"

# Output the resulting dictionary
print(contact_hours_dict)


=== ANSWER ===
The contact hours/week for the Biology course (BSC 401) is 2L+1T, which translates to 2 lectures and 1 tutorial per week, but I don't know the specific contact hours for a general Biology course, this one is for Chemistry and other courses titled as BSC 401, which is actually Chemistry-I.

=== SOURCES ===
- CSE/AllSem23.pdf
- CSE/AllSem23.pdf
- CSE/AllSem23.pdf
- CSE/AllSem23.pdf
- IT/AllSem23.pdf

=== ANSWER ===
The contact hours/week of Computer Architecture (PCC-CS402) is 3L/week.

=== SOURCES ===
- IT/AllSem23.pdf
- CSE/AllSem23.pdf
- CSE/AllSem23.pdf
- IT/AllSem23.pdf
- IT/AllSem23.pdf

=== ANSWER ===
The contact hours/week for Computer Architecture Lab (PCC-CS492) is 4P/week, where 'P' denotes practical hours.

=== SOURCES ===
- IT/AllSem23.pdf
- CSE/AllSem23.pdf
- IT/AllSem23.pdf
- IT/AllSem23.pdf
- CSE/AllSem23.pdf

=== ANSWER ===
I don't know the contact hours/week of Design & Analysis Algorithm Lab (PCC-CS494). The provided context only mentions the course cod

In [13]:
sem = int(input("enter sem"))
variable = dept = input("enter department : (CSE,ECE,IT)")

enter sem 4
enter department : (CSE,ECE,IT) CSE


In [14]:
import pandas as pd
import re
from collections import defaultdict
import random

class SmartRoutineGenerator:
    def __init__(self):
        self.time_slots = [
            '9-10', '10-11', '11-12', '12-1', '2-3', '3-4', '4-5'
        ]
        self.days = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri']
        
    def load_data(self, faculty_file, room_file, student_file):
        """Load data from CSV/Excel files"""
        try:
            self.faculty_df = pd.read_excel(faculty_file) if faculty_file.endswith('.xlsx') else pd.read_csv(faculty_file)
            self.room_df = pd.read_excel(room_file) if room_file.endswith('.xlsx') else pd.read_csv(room_file)
            self.student_df = pd.read_excel(student_file) if student_file.endswith('.xlsx') else pd.read_csv(student_file)
            
            # Clean column names
            self.faculty_df.columns = self.faculty_df.columns.str.strip()
            self.room_df.columns = self.room_df.columns.str.strip()
            self.student_df.columns = self.student_df.columns.str.strip()
            
            print("Data loaded successfully!")
            
            # Debug: Show data structure
            print("\n[DEBUG] Faculty DataFrame Info:")
            print(f"Columns: {list(self.faculty_df.columns)}")
            print(f"Shape: {self.faculty_df.shape}")
            print("\nFirst few rows:")
            print(self.faculty_df.head())
            
            print("\n[DEBUG] Room DataFrame Info:")
            print(f"Columns: {list(self.room_df.columns)}")
            print(f"Shape: {self.room_df.shape}")
            print("\nFirst few rows:")
            print(self.room_df.head())
            
            print("\n[DEBUG] Student DataFrame Info:")
            print(f"Columns: {list(self.student_df.columns)}")
            print(f"Shape: {self.student_df.shape}")
            print("\nFirst few rows:")
            print(self.student_df.head())
            
            return True
        except Exception as e:
            print(f"Error loading data: {e}")
            return False
    
    def parse_contact_hours(self, contact_hour_string):
        """Parse contact hours string like '3L+1T', '4P/week', etc."""
        if not contact_hour_string or contact_hour_string == "Unknown":
            return {'L': 0, 'T': 0, 'P': 0}
        
        # Remove '/week' if present
        contact_hour_string = contact_hour_string.replace('/week', '')
        
        hours = {'L': 0, 'T': 0, 'P': 0}
        
        # Find all patterns like 3L, 1T, 4P
        matches = re.findall(r'(\d+)([LTP])', contact_hour_string)
        for count, type_char in matches:
            hours[type_char] = int(count)
        
        return hours
    
    def parse_unavailable_slots(self, unavailable_str):
        """Parse unavailable slots string"""
        if pd.isna(unavailable_str) or unavailable_str == "":
            return []
        
        slots = []
        # Split by comma and parse each slot
        for slot in str(unavailable_str).split(','):
            slot = slot.strip()
            if '-' in slot:
                # Format: "Mon 9-10" or "Mon 9-10, Tue 11-12"
                if ' ' in slot:
                    day, time = slot.split(' ', 1)
                    slots.append((day.strip(), time.strip()))
        return slots
    
    def parse_preferred_slots(self, preferred_str):
        """Parse preferred slots string"""
        if pd.isna(preferred_str) or preferred_str == "":
            return []
        
        slots = []
        for slot in str(preferred_str).split(','):
            slot = slot.strip()
            if '-' in slot:
                if ' ' in slot:
                    day, time = slot.split(' ', 1)
                    slots.append((day.strip(), time.strip()))
        return slots
    
    def get_suitable_rooms(self, subject_name, department, student_count):
        """Get suitable rooms for a subject"""
        # Determine room type needed
        if 'Lab' in subject_name or 'lab' in subject_name:
            room_type = 'Lab'
        else:
            room_type = 'Classroom'
        
        print(f"[DEBUG] Looking for {room_type} rooms for {subject_name}, capacity >= {student_count}")
        
        # First try: exact match
        suitable_rooms = self.room_df[
            (self.room_df['room_type'].str.contains(room_type, case=False, na=False)) &
            (self.room_df['capacity'] >= student_count) &
            ((self.room_df['department'].str.upper() == department.upper()) | 
             (self.room_df['department'].str.upper() == 'GENERAL'))
        ].copy()
        
        # If no rooms found, try more flexible matching
        if suitable_rooms.empty:
            print(f"[DEBUG] No exact match, trying flexible matching...")
            # Try any room with sufficient capacity
            suitable_rooms = self.room_df[
                (self.room_df['capacity'] >= student_count)
            ].copy()
            
            # If still no rooms, take any room (ignore capacity)
            if suitable_rooms.empty:
                print(f"[DEBUG] No capacity match, taking any available room...")
                suitable_rooms = self.room_df.copy()
        
        room_list = suitable_rooms['room_name'].tolist()
        print(f"[DEBUG] Found suitable rooms: {room_list}")
        return room_list
    
    def get_qualified_faculty(self, subject_name, department, year):
        """Get faculty who can teach a specific subject"""
        qualified_faculty = []
        
        print(f"[DEBUG] Looking for faculty for {subject_name} in {department} year {year}")
        print(f"[DEBUG] Available faculty: {list(self.faculty_df['name'])}")
        
        for _, faculty in self.faculty_df.iterrows():
            # Check if faculty teaches this subject
            faculty_subjects = str(faculty.get('subjects', '')).lower()
            faculty_name = faculty.get('name', '')
            
            print(f"[DEBUG] Checking {faculty_name}: subjects = '{faculty_subjects}'")
            
            # More flexible subject matching
            subject_keywords = []
            
            # Extract key terms from subject name
            if 'biology' in subject_name.lower():
                subject_keywords.extend(['biology', 'bio'])
            if 'computer architecture' in subject_name.lower():
                subject_keywords.extend(['computer architecture', 'architecture', 'computer'])
            if 'lab' in subject_name.lower():
                subject_keywords.extend(['lab', 'laboratory', 'practical'])
            if 'algorithm' in subject_name.lower():
                subject_keywords.extend(['algorithm', 'algorithms'])
            if 'discrete mathematics' in subject_name.lower():
                subject_keywords.extend(['discrete', 'mathematics', 'math'])
            if 'environmental' in subject_name.lower():
                subject_keywords.extend(['environmental', 'environment'])
            if 'formal language' in subject_name.lower() or 'automata' in subject_name.lower():
                subject_keywords.extend(['formal', 'language', 'automata', 'theory'])
            if 'object oriented' in subject_name.lower():
                subject_keywords.extend(['object', 'oriented', 'oop', 'programming'])
            
            # If no specific keywords, use the subject name words
            if not subject_keywords:
                subject_keywords = [word.lower().strip('()[]') for word in subject_name.split() 
                                  if len(word.strip('()[]')) > 2]
            
            print(f"[DEBUG] Subject keywords: {subject_keywords}")
            
            # Check if any keyword matches
            has_subject_match = any(keyword in faculty_subjects for keyword in subject_keywords)
            
            if has_subject_match or 'general' in faculty_subjects or 'all' in faculty_subjects:
                # Check department and year compatibility
                faculty_dept = str(faculty.get('department', '')).upper()
                faculty_year = str(faculty.get('year_to_teach', ''))
                
                print(f"[DEBUG] {faculty_name} - Dept: {faculty_dept}, Year: {faculty_year}")
                
                dept_match = (faculty_dept == department.upper() or 
                             faculty_dept == 'GENERAL' or 
                             faculty_dept == 'ALL')
                
                year_match = (str(year) in faculty_year or 
                             faculty_year.upper() == 'ALL' or
                             faculty_year == '')
                
                if dept_match and year_match:
                    qualified_faculty.append({
                        'name': faculty_name,
                        'max_load_hours': faculty.get('max_load_hours', 20),
                        'unavailable_slots': self.parse_unavailable_slots(faculty.get('unavailable_slots', '')),
                        'preferred_slots': self.parse_preferred_slots(faculty.get('preferred_slots', ''))
                    })
                    print(f"[DEBUG] ✓ {faculty_name} qualified for {subject_name}")
                else:
                    print(f"[DEBUG] ✗ {faculty_name} not qualified - dept_match: {dept_match}, year_match: {year_match}")
        
        print(f"[DEBUG] Qualified faculty for {subject_name}: {[f['name'] for f in qualified_faculty]}")
        return qualified_faculty
    
    def is_faculty_available(self, faculty_name, day, time_slot, faculty_schedule):
        """Check if faculty is available at given time"""
        return faculty_schedule.get(faculty_name, {}).get(day, {}).get(time_slot) is None
    
    def is_room_available(self, room_name, day, time_slot, room_schedule):
        """Check if room is available at given time"""
        return room_schedule.get(room_name, {}).get(day, {}).get(time_slot) is None
    
    def assign_class(self, subject_name, contact_hours, faculty_list, rooms, 
                    section, day, time_slot, faculty_schedule, room_schedule, 
                    faculty_workload, section_schedule):
        """Try to assign a single class"""
        
        # Try to find available faculty
        available_faculty = []
        for faculty in faculty_list:
            faculty_name = faculty['name']
            
            # Check availability
            if not self.is_faculty_available(faculty_name, day, time_slot, faculty_schedule):
                continue
            
            # Check unavailable slots
            unavailable = faculty['unavailable_slots']
            if (day, time_slot) in unavailable:
                continue
            
            # Check workload
            current_load = faculty_workload.get(faculty_name, 0)
            if current_load >= faculty['max_load_hours']:
                continue
            
            available_faculty.append(faculty)
        
        if not available_faculty:
            return None
        
        # Choose faculty (prefer those with preferred slots)
        chosen_faculty = None
        for faculty in available_faculty:
            preferred = faculty['preferred_slots']
            if (day, time_slot) in preferred:
                chosen_faculty = faculty
                break
        
        if not chosen_faculty:
            chosen_faculty = available_faculty[0]  # Take first available
        
        # Find available room
        available_rooms = []
        for room in rooms:
            if self.is_room_available(room, day, time_slot, room_schedule):
                available_rooms.append(room)
        
        if not available_rooms:
            return None
        
        chosen_room = available_rooms[0]
        faculty_name = chosen_faculty['name']
        
        # Make assignment
        if faculty_name not in faculty_schedule:
            faculty_schedule[faculty_name] = {}
        if day not in faculty_schedule[faculty_name]:
            faculty_schedule[faculty_name][day] = {}
        
        if chosen_room not in room_schedule:
            room_schedule[chosen_room] = {}
        if day not in room_schedule[chosen_room]:
            room_schedule[chosen_room][day] = {}
        
        faculty_schedule[faculty_name][day][time_slot] = f"{subject_name} - {section}"
        room_schedule[chosen_room][day][time_slot] = f"{subject_name} - {section}"
        faculty_workload[faculty_name] = faculty_workload.get(faculty_name, 0) + 1
        
        section_schedule[section][day][time_slot] = {
            'subject': subject_name,
            'faculty': faculty_name,
            'room': chosen_room
        }
        
        return True
    
    def generate_routine(self, dept, sem, contact_hours_dict):
        """Generate routine for all sections"""
        
        # Use the provided department and semester directly
        year = int(sem)
        
        # Get sections for this department and year
        sections_data = self.student_df[
            (self.student_df['department'] == dept) & 
            (self.student_df['year'] == year)
        ]
        
        if sections_data.empty:
            return f"No sections found for {dept} Year {year}"
        
        sections = sections_data['section'].tolist()
        
        # Initialize schedules
        faculty_schedule = {}
        room_schedule = {}
        faculty_workload = {}
        section_schedules = {}
        
        for section in sections:
            section_schedules[section] = {}
            for day in self.days:
                section_schedules[section][day] = {}
        
        # Process each subject
        for subject_name, contact_hours_str in contact_hours_dict.items():
            if contact_hours_str == "Unknown":
                continue
                
            contact_hours = self.parse_contact_hours(contact_hours_str)
            total_hours_needed = contact_hours['L'] + contact_hours['T'] + contact_hours['P']
            
            # Get qualified faculty and rooms for this subject
            qualified_faculty = self.get_qualified_faculty(subject_name, dept, year)
            
            # If no qualified faculty found, try to assign any available faculty
            if not qualified_faculty:
                print(f"[DEBUG] No qualified faculty found for {subject_name}, trying any available faculty...")
                for _, faculty in self.faculty_df.iterrows():
                    qualified_faculty.append({
                        'name': faculty['name'],
                        'max_load_hours': faculty.get('max_load_hours', 20),
                        'unavailable_slots': self.parse_unavailable_slots(faculty.get('unavailable_slots', '')),
                        'preferred_slots': self.parse_preferred_slots(faculty.get('preferred_slots', ''))
                    })
                    if len(qualified_faculty) >= 3:  # Limit to first 3 faculty
                        break
            if not qualified_faculty:
                print(f"No qualified faculty found for {subject_name}")
                continue
            
            # Get student count (try different column names)
            try:
                student_count = sections_data.iloc[0]['total_number_of_students']
            except KeyError:
                try:
                    student_count = sections_data.iloc[0]['total_students']
                except KeyError:
                    student_count = 50  # Default value
                    print(f"[DEBUG] Using default student count: {student_count}")
            
            suitable_rooms = self.get_suitable_rooms(subject_name, dept, student_count)
            
            if not suitable_rooms:
                print(f"[DEBUG] No suitable rooms found for {subject_name}, using any available room...")
                suitable_rooms = self.room_df['room_name'].tolist()[:3]  # Take first 3 rooms
            
            # Assign classes for each section
            for section in sections:
                classes_assigned = 0
                
                # Try to assign required number of classes
                for day in self.days:
                    if classes_assigned >= total_hours_needed:
                        break
                        
                    for time_slot in self.time_slots:
                        if classes_assigned >= total_hours_needed:
                            break
                        
                        # Check if slot is already occupied
                        if section_schedules[section][day].get(time_slot):
                            continue
                        
                        # Try to assign class
                        if self.assign_class(subject_name, contact_hours, qualified_faculty, 
                                           suitable_rooms, section, day, time_slot,
                                           faculty_schedule, room_schedule, 
                                           faculty_workload, section_schedules):
                            classes_assigned += 1
                
                if classes_assigned < total_hours_needed:
                    print(f"Warning: Could only assign {classes_assigned}/{total_hours_needed} classes for {subject_name} - Section {section}")
        
        return self.format_routine_output(section_schedules)
    
    def format_routine_output(self, section_schedules):
        """Format the routine output"""
        output = []
        
        for section in sorted(section_schedules.keys()):
            output.append(f"Routine for Section {section}:")
            
            for day in self.days:
                day_schedule = []
                day_schedule.append(f"{day}:")
                
                for time_slot in self.time_slots:
                    slot_info = section_schedules[section][day].get(time_slot)
                    if slot_info:
                        day_schedule.append(f"{time_slot}: {slot_info['subject']} by {slot_info['faculty']} in {slot_info['room']}")
                    else:
                        day_schedule.append(f"{time_slot}: Free")
                
                output.append(" ".join(day_schedule))
            
            output.append("")  # Empty line between sections
        
        return "\n".join(output)

# Function to generate routine with existing variables
def generate_smart_routine_from_variables(dept, sem, contact_hours_dict):
    """Main function to generate routine using existing variables"""
    
    generator = SmartRoutineGenerator()
    
    # Define Kaggle dataset paths
    dataset_dir = '/kaggle/input/college-data/'
    faculty_file = dataset_dir + 'faculty_assignments.csv'
    room_file = dataset_dir + 'room_assignments.csv'
    student_file = dataset_dir + 'student_sections.csv'
    
    # Check if files exist and try different extensions
    import os
    
    # Try different file extensions
    for ext in ['.csv', '.xlsx']:
        if os.path.exists(faculty_file.replace('.csv', ext)):
            faculty_file = faculty_file.replace('.csv', ext)
            break
    
    for ext in ['.csv', '.xlsx']:
        if os.path.exists(room_file.replace('.csv', ext)):
            room_file = room_file.replace('.csv', ext)
            break
            
    for ext in ['.csv', '.xlsx']:
        if os.path.exists(student_file.replace('.csv', ext)):
            student_file = student_file.replace('.csv', ext)
            break
    
    # Load data
    if not generator.load_data(faculty_file, room_file, student_file):
        return "Failed to load data files from Kaggle dataset"
    
    # Generate routine using department and semester directly
    routine = generator.generate_routine(dept, sem, contact_hours_dict)
    return routine

# Usage with your existing variables
# After getting your contact_hours_dict from RAG and having dept, sem variables

# Example of how to use in your existing code:
# Assuming you have:
# - dept = "CSE" (from your query parsing)  
# - sem = "4" (from your query parsing)
# - contact_hours_dict = {...} (from your RAG output)

# Simply call:
routine = generate_smart_routine_from_variables(dept, sem, contact_hours_dict)
print(routine)

Data loaded successfully!

[DEBUG] Faculty DataFrame Info:
Columns: ['faculty_id', 'name', 'department', 'subjects', 'year_to_teach', 'max_load_hours', 'unavailable_slots', 'preferred_slots']
Shape: (60, 8)

First few rows:
  faculty_id          name department  \
0      F0001  Aarav Sharma        CSE   
1      F0002    Isha Verma        CSE   
2      F0003   Rohan Mehta        CSE   
3      F0004    Neha Gupta        CSE   
4      F0005  Aditya Singh        CSE   

                                            subjects year_to_teach  \
0  Data Structure & Algorithms|Mathematics IA|Phy...           1|2   
1  Operating Systems|Database Management Systems|...           2|3   
2  Computer Networks|Compiler Design|Machine Lear...           3|4   
3  Artificial Intelligence|Formal Language & Auto...         2|3|4   
4  Cloud Computing|Theory of Computation|Mathemat...       1|2|3|4   

   max_load_hours                unavailable_slots  \
0              16  Mon 10:00-12:00|Wed 14:00-16:00   


In [33]:
import pandas as pd
import re
import random
import os

class SmartRoutineGenerator:
    DAYS = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri']
    TIME_SLOTS = ['9-10', '10-11', '11-12', '12-1', '2-3', '3-4', '4-5']
    
    def _init_(self):
        self.faculty_df = None
        self.room_df = None
        self.student_df = None
    
    def load_data(self, faculty_file, room_file, student_file):
        try:
            self.faculty_df = pd.read_csv(faculty_file, low_memory=True)
            self.room_df = pd.read_csv(room_file, low_memory=True)
            self.student_df = pd.read_csv(student_file, low_memory=True)
            
            self.faculty_df.columns = self.faculty_df.columns.str.strip()
            self.room_df.columns = self.room_df.columns.str.strip()
            self.student_df.columns = self.student_df.columns.str.strip()
            
            if self.faculty_df.empty or self.room_df.empty or self.student_df.empty:
                print("[ERROR] One or more dataframes are empty")
                return False
            return True
        except Exception as e:
            print(f"[ERROR] Failed to load data: {e}")
            return False
    
    def parse_contact_hours(self, contact_hour_string):
        if not contact_hour_string or contact_hour_string == "Unknown":
            return {'L': 0, 'T': 0, 'P': 0}
        contact_hour_string = contact_hour_string.replace('/week', '')
        hours = {'L': 0, 'T': 0, 'P': 0}
        matches = re.findall(r'(\d+)([LTP])', contact_hour_string)
        for count, type_char in matches:
            hours[type_char] = int(count)
        return hours
    
    def parse_unavailable_slots(self, unavailable_str):
        if pd.isna(unavailable_str) or unavailable_str == "":
            return []
        slots = []
        for slot in str(unavailable_str).split('|'):
            slot = slot.strip()
            if ':' in slot:
                parts = slot.split()
                if len(parts) >= 2:
                    day = parts[0]
                    time_range = parts[1]
                    if '10:00-12:00' in time_range:
                        slots.extend([(day, '10-11'), (day, '11-12')])
                    elif '14:00-16:00' in time_range:
                        slots.extend([(day, '2-3'), (day, '3-4')])
                    elif '09:00-10:00' in time_range:
                        slots.append((day, '9-10'))
                    elif '11:00-13:00' in time_range:
                        slots.extend([(day, '11-12'), (day, '12-1')])
                    elif '13:00-15:00' in time_range:
                        slots.extend([(day, '12-1'), (day, '2-3')])
        return slots
    
    def get_suitable_rooms(self, subject_name, department, student_count):
        room_type = 'Lab' if 'Lab' in subject_name or 'lab' in subject_name else 'Classroom'
        suitable_rooms = self.room_df[
            (self.room_df['room_type'].str.contains(room_type, case=False, na=False)) &
            (self.room_df['capacity'] >= student_count) &
            ((self.room_df['department'].str.upper() == department.upper()) | 
             (self.room_df['department'].str.upper() == 'GENERAL'))
        ]['room_name'].tolist()
        if not suitable_rooms:
            suitable_rooms = self.room_df[self.room_df['capacity'] >= student_count]['room_name'].tolist()
        if not suitable_rooms:
            suitable_rooms = self.room_df['room_name'].tolist()[:3]
        return suitable_rooms or ['DefaultRoom']
    
    def get_qualified_faculty(self, subject_name, department, year):
        qualified_faculty = []
        for _, faculty in self.faculty_df.iterrows():
            faculty_subjects = str(faculty.get('subjects', '')).lower()
            faculty_name = faculty.get('name', '')
            subject_keywords = []
            if 'biology' in subject_name.lower():
                subject_keywords.extend(['biology', 'bio'])
            elif 'computer architecture' in subject_name.lower():
                subject_keywords.extend(['computer architecture', 'architecture', 'computer'])
            elif 'lab' in subject_name.lower():
                subject_keywords.extend(['lab', 'laboratory', 'practical'])
            elif 'algorithm' in subject_name.lower():
                subject_keywords.extend(['algorithm', 'algorithms'])
            elif 'discrete mathematics' in subject_name.lower():
                subject_keywords.extend(['discrete', 'mathematics', 'math'])
            elif 'environmental' in subject_name.lower():
                subject_keywords.extend(['environmental', 'environment'])
            elif 'formal language' in subject_name.lower() or 'automata' in subject_name.lower():
                subject_keywords.extend(['formal', 'language', 'automata', 'theory'])
            else:
                subject_keywords = [word.lower().strip('()[]') for word in subject_name.split() 
                                  if len(word.strip('()[]')) > 2]
            
            if any(keyword in faculty_subjects for keyword in subject_keywords) or 'general' in faculty_subjects or 'all' in faculty_subjects:
                faculty_dept = str(faculty.get('department', '')).upper()
                faculty_year = str(faculty.get('year_to_teach', ''))
                if (faculty_dept in [department.upper(), 'GENERAL', 'ALL'] and 
                    (str(year) in faculty_year or faculty_year.upper() == 'ALL' or faculty_year == '')):
                    qualified_faculty.append({
                        'name': faculty_name,
                        'max_load_hours': faculty.get('max_load_hours', 20),
                        'unavailable_slots': self.parse_unavailable_slots(faculty.get('unavailable_slots', ''))
                    })
        
        return qualified_faculty or [{
            'name': faculty['name'],
            'max_load_hours': 20,
            'unavailable_slots': []
        } for _, faculty in self.faculty_df.head(2).iterrows()]
    
    def is_faculty_available(self, faculty_name, day, time_slot, faculty_schedule):
        return faculty_schedule.get(faculty_name, {}).get(day, {}).get(time_slot) is None
    
    def is_room_available(self, room_name, day, time_slot, room_schedule):
        return room_schedule.get(room_name, {}).get(day, {}).get(time_slot) is None
    
    def assign_class(self, subject_name, contact_hours, faculty_list, rooms, 
                    section, day, time_slot, faculty_schedule, room_schedule, 
                    faculty_workload, section_schedule):
        available_faculty = [
            f for f in faculty_list
            if self.is_faculty_available(f['name'], day, time_slot, faculty_schedule) and
            (day, time_slot) not in f['unavailable_slots'] and
            faculty_workload.get(f['name'], 0) < f['max_load_hours']
        ]
        if not available_faculty:
            return False
        
        chosen_faculty = random.choice(available_faculty)
        faculty_name = chosen_faculty['name']
        available_rooms = [room for room in rooms if self.is_room_available(room, day, time_slot, room_schedule)]
        if not available_rooms:
            return False
        
        chosen_room = random.choice(available_rooms)
        
        faculty_schedule.setdefault(faculty_name, {}).setdefault(day, {})[time_slot] = f"{subject_name} - {section}"
        room_schedule.setdefault(chosen_room, {}).setdefault(day, {})[time_slot] = f"{subject_name} - {section}"
        faculty_workload[faculty_name] = faculty_workload.get(faculty_name, 0) + 1
        section_schedule.setdefault(section, {}).setdefault(day, {})[time_slot] = {
            'subject': subject_name,
            'faculty': faculty_name,
            'room': chosen_room
        }
        return True
    
    def distribute_classes(self, subject_requirements, sections, faculty_schedule, 
                         room_schedule, faculty_workload, section_schedules):
        assignments_made = {f"{subject_name}-{section}": 0 
                          for subject_name in subject_requirements for section in sections}
        
        # Assign classes to meet contact hours, prioritizing early days
        for subject_name, (contact_hours, faculty_list, rooms) in subject_requirements.items():
            total_hours = sum(contact_hours.values())
            for section in sections:
                hours_assigned = 0
                for day in self.DAYS:  # Prioritize Mon-Wed
                    if hours_assigned >= total_hours:
                        break
                    for time_slot in [t for t in self.TIME_SLOTS if t != "12-1"]:
                        if section_schedules[section][day].get(time_slot):
                            continue
                        if self.assign_class(subject_name, contact_hours, faculty_list, rooms, 
                                          section, day, time_slot, faculty_schedule, 
                                          room_schedule, faculty_workload, section_schedules):
                            assignments_made[f"{subject_name}-{section}"] += 1
                            hours_assigned += 1
                            if hours_assigned >= total_hours:
                                break
        return section_schedules
    
    def format_routine_output_table(self, section_schedules, sem):
        output = []
        for section in sorted(section_schedules.keys()):
            output.append(f"\n{'='*100}")
            output.append(f"ROUTINE FOR SECTION {section} - CSE {sem}th Semester")
            output.append(f"{'='*100}")
            output.append(f"{'Time':<8}│{'Monday':<20}│{'Tuesday':<20}│{'Wednesday':<20}│{'Thursday':<20}│{'Friday':<20}")
            output.append("─" * 8 + "┼" + "─" * 20 + "┼" + "─" * 20 + "┼" + "─" * 20 + "┼" + "─" * 20 + "┼" + "─" * 20)
            
            for time_slot in self.TIME_SLOTS:
                if time_slot == "12-1":
                    output.append(f"{'12-1':<8}│{'BREAK':<20}│{'BREAK':<20}│{'BREAK':<20}│{'BREAK':<20}│{'BREAK':<20}")
                else:
                    lines = ["", "", ""]
                    for day in self.DAYS:
                        slot_info = section_schedules[section][day].get(time_slot)
                        if slot_info:
                            subject = slot_info['subject'][:17] + "..." if len(slot_info['subject']) > 17 else slot_info['subject']
                            faculty = slot_info['faculty'][:17] + "..." if len(slot_info['faculty']) > 17 else slot_info['faculty'] 
                            room = slot_info['room'][:17] + "..." if len(slot_info['room']) > 17 else slot_info['room']
                            lines[0] += f"│{subject:<20}"
                            lines[1] += f"│{faculty:<20}"
                            lines[2] += f"│{room:<20}"
                        else:
                            lines[0] += f"│{'FREE':<20}"
                            lines[1] += f"│{'':<20}"
                            lines[2] += f"│{'':<20}"
                    
                    output.append(f"{time_slot:<8}{lines[0]}")
                    output.append(f"{'':<8}{lines[1]}")
                    output.append(f"{'':<8}{lines[2]}")
                output.append("─" * 8 + "┼" + "─" * 20 + "┼" + "─" * 20 + "┼" + "─" * 20 + "┼" + "─" * 20 + "┼" + "─" * 20)
            
            output.append("")
        
        output.append(f"\n{'='*100}")
        output.append("TIMETABLE SUMMARY")
        output.append(f"{'='*100}")
        
        faculty_workload = {}
        for section in sorted(section_schedules.keys()):
            total_classes = 0
            subject_count = {}
            for day in self.DAYS:
                for time_slot in self.TIME_SLOTS:
                    if time_slot == "12-1":
                        continue
                    slot_info = section_schedules[section][day].get(time_slot)
                    if slot_info:
                        total_classes += 1
                        subject = slot_info['subject']
                        faculty = slot_info['faculty']
                        subject_count[subject] = subject_count.get(subject, 0) + 1
                        faculty_workload[faculty] = faculty_workload.get(faculty, 0) + 1
            
            output.append(f"\nSection {section}: {total_classes} classes scheduled")
            for subject, count in sorted(subject_count.items()):
                output.append(f"  • {subject}: {count} hours/week")
        
        output.append(f"\n{'-'*50}")
        output.append("FACULTY WORKLOAD DISTRIBUTION")
        output.append(f"{'-'*50}")
        for faculty, hours in sorted(faculty_workload.items()):
            output.append(f"{faculty:<25}: {hours} hours/week")
        
        return "\n".join(output)
    
    def generate_routine(self, dept, sem, contact_hours_dict):
        try:
            year = int(sem)
            sections_data = self.student_df[
                (self.student_df['department'] == dept) & 
                (self.student_df['year'] == year)
            ]
            
            if sections_data.empty:
                print(f"[ERROR] No sections found for {dept} Year {year}")
                return f"No sections found for {dept} Year {year}"
            
            sections = sections_data['section'].tolist()
            faculty_schedule = {}
            room_schedule = {}
            faculty_workload = {}
            section_schedules = {section: {day: {} for day in self.DAYS} for section in sections}
            
            student_count = sections_data.iloc[0].get('total_number_of_students', 
                                                   sections_data.iloc[0].get('total_students', 50))
            
            subject_requirements = {}
            for subject_name, contact_hours_str in contact_hours_dict.items():
                if contact_hours_str == "Unknown":
                    continue
                contact_hours = self.parse_contact_hours(contact_hours_str)
                qualified_faculty = self.get_qualified_faculty(subject_name, dept, year)
                suitable_rooms = self.get_suitable_rooms(subject_name, dept, student_count)
                subject_requirements[subject_name] = (contact_hours, qualified_faculty, suitable_rooms)
            
            if not subject_requirements:
                print(f"[ERROR] No valid subjects to schedule")
                return "No valid subjects to schedule"
            
            section_schedules = self.distribute_classes(subject_requirements, sections, 
                                                     faculty_schedule, room_schedule, 
                                                     faculty_workload, section_schedules)
            
            return self.format_routine_output_table(section_schedules, sem)
        except Exception as e:
            print(f"[ERROR] Error in generate_routine: {e}")
            return f"Failed to generate routine: {e}"

def generate_smart_routine_from_variables(dept, sem, contact_hours_dict=None):
    print(f"[INFO] Generating routine for dept={dept}, sem={sem}")
    if contact_hours_dict is None:
        contact_hours_dict = {
            "Biology (BSC)": "3L",
            "Computer Architecture (PCC-CS402)": "3L+1T",
            "Computer Architecture Lab (PCC-CS492)": "3P",
            "Design and Analysis of Algorithms (PCC-CS404)": "3L+1T",
            "Discrete Mathematics (PCC-CS401)": "3L",
            "Environmental Sciences (MC-401)": "1L",
            "Formal Language & Automata Theory (PCC-CS403)": "3L"
        }
    
    generator = SmartRoutineGenerator()
    dataset_dir = '/kaggle/input/college-data/'
    faculty_file = dataset_dir + 'faculty_assignments.csv'
    room_file = dataset_dir + 'room_assignments.csv'
    student_file = dataset_dir + 'student_sections.csv'
    
    if not os.path.exists(faculty_file):
        print(f"[ERROR] Faculty file not found: {faculty_file}")
        return "Faculty file not found"
    if not os.path.exists(room_file):
        print(f"[ERROR] Room file not found: {room_file}")
        return "Room file not found"
    if not os.path.exists(student_file):
        print(f"[ERROR] Student file not found: {student_file}")
        return "Student file not found"
    
    if not generator.load_data(faculty_file, room_file, student_file):
        return "Failed to load data files"
    
    return generator.generate_routine(dept, sem, contact_hours_dict)

if __name__ == "__main__":
    dept = "CSE"
    sem = "4"
    routine = generate_smart_routine_from_variables(dept, sem)
    print(routine)

[INFO] Generating routine for dept=CSE, sem=4

ROUTINE FOR SECTION A - CSE 4th Semester
Time    │Monday              │Tuesday             │Wednesday           │Thursday            │Friday              
────────┼────────────────────┼────────────────────┼────────────────────┼────────────────────┼────────────────────
9-10    │Biology (BSC)       │Computer Architec...│Computer Architec...│Formal Language &...│FREE                
        │Aarav Sharma        │Rohan Mehta         │Rohan Mehta         │Aditya Singh        │                    
        │CSE-03              │ECE-04              │ECE-14              │CSE-01              │                    
────────┼────────────────────┼────────────────────┼────────────────────┼────────────────────┼────────────────────
10-11   │Biology (BSC)       │Design and Analys...│Computer Architec...│Formal Language &...│FREE                
        │Aarav Sharma        │Aarav Sharma        │Rohan Mehta         │Aditya Singh        │                    
