# Schedule Recommender System

Generates semester course recommendations for students based on program requirements, prerequisites, and preferences.

**Quick Start:** Run all cells from top to bottom (Cell → Run All)

In [None]:
import pandas as pd
import numpy as np
from sqlalchemy.orm import Session
import sys
import os
from Database.models import RecommendationResult
from Database.db_helpers import create_record

# Add parent directory to path for imports
sys.path.append('/notebook')

from Database.database import (
    get_db_session, engine, check_db_connection, verify_tables_exist
)
from Database.models import (
    Student, Course, Section, Takes, Prerequisites, 
    TimeSlot, Program, HasCourse, Cluster, CourseCluster, Preferred,
    RecommendationResult
)
from Database.db_helpers import (
    get_by_id, get_all, create_record, update_record, 
    delete_record, exists, count_records
)


## 1. Load Data


In [None]:
# Get database session and load data
db = get_db_session()

def load_data():
    """Load all data from database into pandas DataFrames"""
    
    students_df = pd.read_sql_table('students', engine)
    courses_df = pd.read_sql_table('courses', engine)
    sections_df = pd.read_sql_table('sections', engine)
    takes_df = pd.read_sql_table('takes', engine)
    prerequisites_df = pd.read_sql_table('prerequisites', engine)
    programs_df = pd.read_sql_table('programs', engine)
    hascourse_df = pd.read_sql_table('hascourse', engine)
    timeslots_df = pd.read_sql_table('time_slots', engine)
    clusters_df = pd.read_sql_table('clusters', engine)
    course_cluster_df = pd.read_sql_table('course_cluster', engine)
    preferred_df = pd.read_sql_table('preferred', engine)
    
    return {
        'students': students_df,
        'courses': courses_df,
        'sections': sections_df,
        'takes': takes_df,
        'prerequisites': prerequisites_df,
        'programs': programs_df,
        'hascourse': hascourse_df,
        'time_slots': timeslots_df,
        'clusters': clusters_df,
        'course_cluster': course_cluster_df,
        'preferred': preferred_df
    }

data = load_data()
print("\n✓ Data loaded successfully!")
print(f"  Students: {len(data['students'])}")
print(f"  Courses: {len(data['courses'])}")
print(f"  Sections: {len(data['sections'])}")
print(f"  Takes records: {len(data['takes'])}")
print(f"  Programs: {len(data['programs'])}")



✓ Data loaded successfully!
  Students: 10
  Courses: 51
  Sections: 51
  Takes records: 151
  Programs: 3


In [3]:
# Verify database connection
print("Checking database connection...")
if check_db_connection():
    print("✓ Database is accessible!")
else:
    print("✗ Failed to connect to database. Please check your DATABASE_URL.")
    raise ConnectionError("Cannot connect to database")

# Verify required tables exist
print("\nVerifying database tables...")
table_status = verify_tables_exist()
for table, exists in table_status.items():
    if not table.endswith('_count'):
        count = table_status.get(f"{table}_count", 0)
        status = "✓" if exists else "✗"
        print(f"{status} {table}: {'exists' if exists else 'missing'} ({count} rows)" if exists else f"{status} {table}: missing")

# Check if we have data
if all(table_status.get(t, False) for t in ['students', 'courses', 'sections']):
    print("\n✓ All required tables exist and are accessible!")
else:
    print("\n⚠ Some required tables are missing. Please ensure the database is initialized.")


Checking database connection...
✓ Database connection successful!
✓ Database is accessible!

Verifying database tables...
✓ students: exists (10 rows)
✓ courses: exists (51 rows)
✓ sections: exists (51 rows)
✓ takes: exists (151 rows)
✓ prerequisites: exists (7 rows)
✓ programs: exists (3 rows)
✓ hascourse: exists (60 rows)
✓ time_slots: exists (468 rows)

✓ All required tables exist and are accessible!


## 2. Generate Recommendations


In [4]:

class SemesterScheduler:
    """
    Production-ready semester recommendation engine that follows:
    - Gen-Ed cluster rules (3 courses from each cluster group)
    - BSDS sample schedule structure
    - Prerequisites
    - Credit standing
    - Time-of-day preferences
    """
    
    def __init__(self, data_dict, current_year=2024, current_semester='Fall'):
        self.data = data_dict
        self.current_year = current_year
        self.current_semester = current_semester
        
        # BSDS Schedule Template (hardcoded based on sample schedule)
        # Format: {semester_number: {slot: [course_ids]}}
        # Semester 1 = Fall 1, Semester 2 = Spring 1, etc.
        self.bsds_schedule = {
            1: {  # Fall 1 (Freshman)
                'main': [1, 6, 11],  # CS100, CS111, CS110
                'gened': [],  # Gen-Ed 1
                'foundation': [28]  # FND101
            },
            2: {  # Spring 1
                'main': [2, 7, 12, 16],  # CS101, BUS101, DS120, CS104
                'gened': [],
                'foundation': [29]  # FND102
            },
            3: {  # Fall 2
                'main': [3, 8, 13],  # CS102, CS107, DS115
                'gened': [],
                'foundation': [30]  # FND103
            },
            4: {  # Spring 2
                'main': [4, 9, 14, 17],  # ENGS211, CS108, DS205, DS150
                'gened': [],
                'foundation': [31]  # FND104
            },
            5: {  # Fall 3
                'main': [10, 15, 18],  # DS110, CS246, DS116
                'gened': [],
                'foundation': [32]  # FND221
            },
            6: {  # Spring 3
                'main': [19],  # CS251
                'gened': [],
                'foundation': [33]  # FND222
            },
            7: {  # Fall 4
                'main': [],  # Track courses + electives
                'gened': [],
                'foundation': []
            },
            8: {  # Spring 4
                'main': [5],  # DS299 Capstone
                'gened': [],
                'foundation': []
            }
        }
        
        # Gen-Ed cluster groups
        # Group A: GENED cluster_number 1,2,3 (cluster_id 7,8,9)
        # Group B: GENED cluster_number 4,5,6 (cluster_id 10,11,12)
        # Group C: We'll use BSDS clusters 7,8,9 (cluster_id 14,15,16) or interpret as additional Gen-Ed
        self.gened_groups = {
            'A': [7, 8, 9],   # GENED cluster_id with cluster_number 1,2,3
            'B': [10, 11, 12], # GENED cluster_id with cluster_number 4,5,6
            'C': [14, 15, 16]   # BSDS clusters (or additional requirement)
        }
        
        # Build helper mappings
        self._build_mappings()
    
    def _get_program_from_cluster_id(self, cluster_id):
        """Derive program name from cluster_id based on data generator logic"""
        # Based on university_data_generator.py:
        # Clusters 1-6: FND
        # Clusters 7-12: GENED
        # Clusters 13-16: BSDS
        if 1 <= cluster_id <= 6:
            return 'FND'
        elif 7 <= cluster_id <= 12:
            return 'GENED'
        elif 13 <= cluster_id <= 16:
            return 'BSDS'
        else:
            return None
    
    def _build_mappings(self):
        """Build helper mappings for efficient lookups"""
        # Map course_id to clusters
        self.course_to_clusters = {}
        if 'course_cluster' in self.data and len(self.data['course_cluster']) > 0:
            for _, row in self.data['course_cluster'].iterrows():
                course_id = int(row['course_id'])
                cluster_id = int(row['cluster_id'])
                if course_id not in self.course_to_clusters:
                    self.course_to_clusters[course_id] = []
                self.course_to_clusters[course_id].append(cluster_id)
        
        # Map cluster_id to cluster info
        self.cluster_info = {}
        if 'clusters' in self.data and len(self.data['clusters']) > 0:
            for _, row in self.data['clusters'].iterrows():
                cluster_id = int(row['cluster_id'])
                self.cluster_info[cluster_id] = {
                    'cluster_number': int(row['cluster_number']),
                    'prog_name': self._get_program_from_cluster_id(cluster_id),
                    'description': row.get('theme', '')  # Use 'theme' instead of 'description'
                }
        
        # Map sections to time slots
        self.section_to_timeslot = {}
        if 'sections' in self.data and len(self.data['sections']) > 0:
            for _, row in self.data['sections'].iterrows():
                self.section_to_timeslot[int(row['id'])] = int(row.get('time_slot_id', 0))
        
        # Map time_slot_id to time info
        self.timeslot_info = {}
        if 'time_slots' in self.data and len(self.data['time_slots']) > 0:
            for _, row in self.data['time_slots'].iterrows():
                slot_id = int(row['time_slot_id'])
                self.timeslot_info[slot_id] = {
                    'day_of_week': row.get('day_of_week', ''),
                    'start_time': row.get('start_time', ''),
                    'end_time': row.get('end_time', ''),
                    'year': int(row.get('year', 0)) if pd.notna(row.get('year')) else 0,
                    'semester': row.get('semester', '')
                }
    
    def get_student_credits(self, student_id):
        """
        Calculate total credits completed by student from takes table.
        This is the source of truth - credits are calculated from actual completed courses.
        """
        student_takes = self.data['takes'][
            (self.data['takes']['student_id'] == student_id) & 
            (self.data['takes']['status'] == 'completed')
        ]
        
        if len(student_takes) == 0:
            return 0
        
        # Get course IDs from sections
        section_ids = student_takes['section_id'].values
        completed_courses = self.data['sections'][
            self.data['sections']['id'].isin(section_ids)
        ]['course_id'].values
        
        # Sum credits
        total_credits = 0
        for course_id in completed_courses:
            course = self.data['courses'][self.data['courses']['id'] == course_id]
            if len(course) > 0:
                total_credits += int(course.iloc[0]['credits'])
        
        return total_credits
    
    def get_student_standing(self, student_id):
        """Determine student standing based on credits"""
        credits = self.get_student_credits(student_id)
        if credits < 30:
            return 'Freshman'
        elif credits < 60:
            return 'Sophomore'
        elif credits < 90:
            return 'Junior'
        else:
            return 'Senior'
    
    def get_student_completed_courses(self, student_id):
        """Get set of completed course IDs"""
        student_takes = self.data['takes'][
            (self.data['takes']['student_id'] == student_id) & 
            (self.data['takes']['status'] == 'completed')
        ]
        
        if len(student_takes) == 0:
            return set()
        
        section_ids = student_takes['section_id'].values
        completed_courses = self.data['sections'][
            self.data['sections']['id'].isin(section_ids)
        ]['course_id'].unique()
        
        return set(completed_courses)
    
    def get_student_cluster_profile(self, student_id):
        """
        Get student's cluster completion profile.
        Returns: dict with cluster_id -> count of completed courses
        """
        completed_courses = self.get_student_completed_courses(student_id)
        cluster_counts = {}
        
        for course_id in completed_courses:
            clusters = self.course_to_clusters.get(int(course_id), [])
            for cluster_id in clusters:
                cluster_counts[cluster_id] = cluster_counts.get(cluster_id, 0) + 1
        
        return cluster_counts
    
    def get_remaining_gened_requirements(self, student_id):
        """
        Calculate remaining Gen-Ed requirements.
        Returns: dict with group -> remaining count needed
        """
        cluster_profile = self.get_student_cluster_profile(student_id)
        
        requirements = {
            'A': 3,  # Need 3 from clusters 7,8,9
            'B': 3,  # Need 3 from clusters 10,11,12
            'C': 3   # Need 3 from clusters 14,15,16 (or adjust based on requirements)
        }
        
        for group, cluster_ids in self.gened_groups.items():
            completed = sum(cluster_profile.get(cid, 0) for cid in cluster_ids)
            requirements[group] = max(0, requirements[group] - completed)
        
        return requirements
    
    def filter_by_time_preference(self, sections_df, time_preference='any'):
        """
        Filter sections by time-of-day preference.
        time_preference: 'morning', 'afternoon', 'evening', or 'any' (default - no filtering)
        """
        if len(sections_df) == 0:
        
        # If 'any', return all sections without filtering
            return sections_df
        
            return sections_df
        
        filtered_sections = []
        
        for _, section in sections_df.iterrows():
            section_id = int(section['id'])
            timeslot_id = self.section_to_timeslot.get(section_id)
            
            if timeslot_id and timeslot_id in self.timeslot_info:
                time_info = self.timeslot_info[timeslot_id]
                start_time = time_info.get('start_time', '')
                
                if start_time:
                    try:
                        # Parse time (format: HH:MM:SS)
                        hour = int(start_time.split(':')[0])
                        
                        if time_preference == 'morning' and 8 <= hour < 12:
                            filtered_sections.append(section)
                        elif time_preference == 'afternoon' and 12 <= hour < 17:
                            filtered_sections.append(section)
                        elif time_preference == 'evening' and (17 <= hour or hour < 8):
                            filtered_sections.append(section)
                    except:
                        # If parsing fails, include the section
                        filtered_sections.append(section)
            else:
                # If no time info, include the section
                filtered_sections.append(section)
        
        if filtered_sections:
            return pd.DataFrame(filtered_sections)
        return pd.DataFrame()
    
    def filter_courses_by_prereqs(self, course_ids, student_id):
        """Filter courses to only those where prerequisites are satisfied"""
        completed = self.get_student_completed_courses(student_id)
        eligible = []
        
        for course_id in course_ids:
            # Get prerequisites
            prereqs = self.data['prerequisites'][
                self.data['prerequisites']['course_id'] == course_id
            ]['prerequisite_id'].values
            
            # Check if all prerequisites are completed
            if len(prereqs) == 0 or all(int(p) in completed for p in prereqs):
                eligible.append(course_id)
        
        return eligible
    
    def filter_by_semester_availability(self, sections_df):
        """Filter sections to only those available in current semester/year"""
        if len(sections_df) == 0:
        
        # If 'any', return all sections without filtering
            return sections_df
        
            return sections_df
        
        available_sections = []
        
        for _, section in sections_df.iterrows():
            section_id = int(section['id'])
            timeslot_id = self.section_to_timeslot.get(section_id)
            
            if timeslot_id and timeslot_id in self.timeslot_info:
                time_info = self.timeslot_info[timeslot_id]
                year = time_info.get('year', 0)
                semester = time_info.get('semester', '')
                
                # Match current semester/year, or if no match, include sections from current or future years
                # This ensures Freshman students can get recommendations even if exact match fails
                if year == self.current_year and semester == self.current_semester:
                    available_sections.append(section)
                elif year >= self.current_year:
                    # Include sections from current or future semesters as fallback
                    available_sections.append(section)
            else:
                # If no time info, include it (assume available)
                available_sections.append(section)
        
        if available_sections:
            return pd.DataFrame(available_sections)
        return pd.DataFrame()
    
    def get_schedule_template_for_semester(self, student_id):
        """
        Determine which semester template to use based on student's progress.
        Returns semester number (1-8)
        """
        credits = self.get_student_credits(student_id)
        standing = self.get_student_standing(student_id)
        
        # Map credits/standing to semester
        if credits < 15:
            return 1  # Fall 1
        elif credits < 30:
            return 2  # Spring 1
        elif credits < 45:
            return 3  # Fall 2
        elif credits < 60:
            return 4  # Spring 2
        elif credits < 75:
            return 5  # Fall 3
        elif credits < 90:
            return 6  # Spring 3
        elif credits < 105:
            return 7  # Fall 4
        else:
            return 8  # Spring 4
    
    def recommend_gened(self, student_id, time_preference='any'):
        """
        Recommend a Gen-Ed course based on remaining requirements.
        Returns: dict with course info or None
        """
        remaining = self.get_remaining_gened_requirements(student_id)
        
        # Find which group needs courses
        target_group = None
        for group, needed in remaining.items():
            if needed > 0:
                target_group = group
                break
        
        if target_group is None:
            return None  # All Gen-Ed requirements met
        
        # Get cluster IDs for this group
        target_clusters = self.gened_groups[target_group]
        
        # Find courses in these clusters
        candidate_courses = []
        if 'course_cluster' in self.data and len(self.data['course_cluster']) > 0:
            for _, row in self.data['course_cluster'].iterrows():
                cluster_id = int(row['cluster_id'])
                if cluster_id in target_clusters:
                    course_id = int(row['course_id'])
                    candidate_courses.append(course_id)
        
        # Filter by prerequisites
        candidate_courses = self.filter_courses_by_prereqs(candidate_courses, student_id)
        
        # Remove already completed/enrolled
        completed = self.get_student_completed_courses(student_id)
        enrolled = self._get_enrolled_courses(student_id)
        candidate_courses = [c for c in candidate_courses if c not in completed and c not in enrolled]
        
        # Fallback: if no courses found in specific clusters, try to find any Gen-Ed course
        # (courses in GENED program that aren't completed)
        if not candidate_courses:
            # Get all courses in GENED program
            if 'hascourse' in self.data and len(self.data['hascourse']) > 0:
                gened_courses = self.data['hascourse'][
                    self.data['hascourse']['prog_name'] == 'GENED'
                ]['courseid'].unique()
                
                # Filter by prerequisites and remove completed/enrolled
                gened_courses = self.filter_courses_by_prereqs(gened_courses.tolist(), student_id)
                gened_courses = [c for c in gened_courses if c not in completed and c not in enrolled]
                candidate_courses = gened_courses
        
        if not candidate_courses:
            return None
        
        # Get sections for candidate courses
        sections = self.data['sections'][
            self.data['sections']['course_id'].isin(candidate_courses)
        ]
        
        if len(sections) == 0:
            return None
        
        # Try strict filtering first (semester + time preference)
        sections_filtered = self.filter_by_semester_availability(sections.copy())
        sections_filtered = self.filter_by_time_preference(sections_filtered, time_preference)
        
        # Fallback: if no sections match strict filters, try with just semester
        if len(sections_filtered) == 0:
            sections_filtered = self.filter_by_semester_availability(sections.copy())
        
        # Fallback: if still no sections, use any available sections
        if len(sections_filtered) == 0:
            sections_filtered = sections.copy()
        
        if len(sections_filtered) == 0:
            return None
        
        # Pick first available section
        section = sections_filtered.iloc[0]
        course_id = int(section['course_id'])
        course = self.data['courses'][self.data['courses']['id'] == course_id].iloc[0]
        
        # Get cluster info
        clusters = self.course_to_clusters.get(course_id, [])
        cluster_names = []
        for cid in clusters:
            if cid in self.cluster_info:
                cluster_names.append(self.cluster_info[cid]['description'])
        
        return {
            'course_id': course_id,
            'course_name': course['name'],
            'cluster': ', '.join(cluster_names) if cluster_names else 'Gen-Ed',
            'credits': int(course['credits']),
            'section_id': int(section['id']),
            'time_slot': self._get_time_slot_string(int(section['id'])),
            'why_recommended': [f'Gen-Ed requirement for cluster group {target_group}']
        }
    
    def recommend_main_courses(self, student_id, time_preference='any', n=3):
        """
        Recommend main courses (Core/Track) based on BSDS schedule.
        Returns: list of course recommendation dicts
        """
        semester_num = self.get_schedule_template_for_semester(student_id)
        template = self.bsds_schedule.get(semester_num, {})
        template_courses = template.get('main', [])
        
        # Filter by prerequisites
        eligible_courses = self.filter_courses_by_prereqs(template_courses, student_id)
        
        # Remove completed/enrolled
        completed = self.get_student_completed_courses(student_id)
        enrolled = self._get_enrolled_courses(student_id)
        eligible_courses = [c for c in eligible_courses if c not in completed and c not in enrolled]
        
        recommendations = []
        
        # Try to get up to n courses, but continue even if some don't have sections
        for course_id in eligible_courses[:n]:
            # Get sections
            sections = self.data['sections'][
                self.data['sections']['course_id'] == course_id
            ]
            
            if len(sections) == 0:
                continue
            
            # Try strict filtering first (semester + time preference)
            sections_filtered = self.filter_by_semester_availability(sections.copy())
            sections_filtered = self.filter_by_time_preference(sections_filtered, time_preference)
            
            # Fallback: if no sections match strict filters, try with just semester
            if len(sections_filtered) == 0:
                sections_filtered = self.filter_by_semester_availability(sections.copy())
            
            # Fallback: if still no sections, use any available sections
            if len(sections_filtered) == 0:
                sections_filtered = sections.copy()
            
            if len(sections_filtered) == 0:
                continue
            
            section = sections_filtered.iloc[0]
            course = self.data['courses'][self.data['courses']['id'] == course_id].iloc[0]
            
            # Get cluster info
            clusters = self.course_to_clusters.get(course_id, [])
            cluster_names = []
            for cid in clusters:
                if cid in self.cluster_info:
                    cluster_names.append(self.cluster_info[cid]['description'])
            
            recommendations.append({
                'course_id': course_id,
                'course_name': course['name'],
                'cluster': ', '.join(cluster_names) if cluster_names else 'Core',
                'credits': int(course['credits']),
                'section_id': int(section['id']),
                'time_slot': self._get_time_slot_string(int(section['id'])),
                'why_recommended': [f'Required for semester {semester_num} per BSDS schedule']
            })
        
        return recommendations
    
    def recommend_foundation(self, student_id, time_preference='any'):
        """Recommend a Foundation course if needed"""
        semester_num = self.get_schedule_template_for_semester(student_id)
        template = self.bsds_schedule.get(semester_num, {})
        foundation_courses = template.get('foundation', [])
        
        if not foundation_courses:
            return None
        
        # Check if student has completed/enrolled foundation requirements
        completed = self.get_student_completed_courses(student_id)
        enrolled = self._get_enrolled_courses(student_id)
        
        # Filter by prerequisites and availability
        eligible = self.filter_courses_by_prereqs(foundation_courses, student_id)
        eligible = [c for c in eligible if c not in completed and c not in enrolled]
        
        if not eligible:
            return None
        
        # Get sections
        sections = self.data['sections'][
            self.data['sections']['course_id'].isin(eligible)
        ]
        
        if len(sections) == 0:
            return None
        
        # Try strict filtering first (semester + time preference)
        sections_filtered = self.filter_by_semester_availability(sections.copy())
        sections_filtered = self.filter_by_time_preference(sections_filtered, time_preference)
        
        # Fallback: if no sections match strict filters, try with just semester
        if len(sections_filtered) == 0:
            sections_filtered = self.filter_by_semester_availability(sections.copy())
        
        # Fallback: if still no sections, use any available sections
        if len(sections_filtered) == 0:
            sections_filtered = sections.copy()
        
        if len(sections_filtered) == 0:
            return None
        
        section = sections_filtered.iloc[0]
        course_id = int(section['course_id'])
        course = self.data['courses'][self.data['courses']['id'] == course_id].iloc[0]
        
        return {
            'course_id': course_id,
            'course_name': course['name'],
            'cluster': 'Foundation',
            'credits': int(course['credits']),
            'section_id': int(section['id']),
            'time_slot': self._get_time_slot_string(int(section['id'])),
            'why_recommended': ['Foundation requirement']
        }
    
    def _get_enrolled_courses(self, student_id):
        """Get set of currently enrolled course IDs"""
        student_takes = self.data['takes'][
            (self.data['takes']['student_id'] == student_id) & 
            (self.data['takes']['status'] == 'enrolled')
        ]
        
        if len(student_takes) == 0:
            return set()
        
        section_ids = student_takes['section_id'].values
        enrolled_courses = self.data['sections'][
            self.data['sections']['id'].isin(section_ids)
        ]['course_id'].unique()
        
        return set(enrolled_courses)
    
    def _get_time_slot_string(self, section_id):
        """Get human-readable time slot string"""
        timeslot_id = self.section_to_timeslot.get(section_id)
        if timeslot_id and timeslot_id in self.timeslot_info:
            info = self.timeslot_info[timeslot_id]
            day = info.get('day_of_week', '')
            start = info.get('start_time', '')
            end = info.get('end_time', '')
            if start and end:
                return f"{day} {start}-{end}"
        return "TBA"
    
    def recommend_semester(self, student_id, time_preference='any'):
        """
        Main function: Recommend a full semester schedule (5 courses).
        Returns: JSON list of recommendations
        """
        recommendations = []
        
        # Slot 1-3: Main courses (Core/Track)
        main_courses = self.recommend_main_courses(student_id, time_preference, n=3)
        recommendations.extend(main_courses)
        
        # Slot 4: Gen-Ed
        gened = self.recommend_gened(student_id, time_preference)
        if gened:
            recommendations.append(gened)
        
        # Slot 5: Foundation or Free Elective
        foundation = self.recommend_foundation(student_id, time_preference)
        if foundation:
            recommendations.append(foundation)
        
        return recommendations

# Initialize scheduler
scheduler = SemesterScheduler(data, current_year=2025, current_semester='Fall')
print("✓ Recommender initialized!")


✓ Recommender initialized!


In [None]:
# Ensure db session is available (created in cell 3)
if 'db' not in globals():
    from Database.database import get_db_session
    db = get_db_session()
    print("Note: Created new database session")

def save_recommendations_to_db(db: Session, scheduler, student_id: int, recommendations: list, 
                                 model_version: str = 'semester_scheduler_v1', 
                                 time_preference: str = 'any',
                                 semester: str = 'Fall',
                                 year: int = 2024):
    """Save semester recommendations to database."""
    # Convert to Python int in case it's numpy.int64 (from pandas)
    student_id = int(student_id)
    
    if not recommendations:
        print(f"  ⚠️  No recommendations to save for student {student_id}")
        return
    
    # Use CRUD helper to create records
    saved_count = 0
    for slot_num, rec in enumerate(recommendations, 1):
        # Get time_slot_id from section (not the formatted string)        section_id = int(rec['section_id'])        time_slot_id = scheduler.section_to_timeslot.get(section_id)        # Convert why_recommended list to string
        why_recommended_str = ', '.join(rec.get('why_recommended', []))
        
        result_data = {
            'student_id': student_id,
            'course_id': int(rec['course_id']),  # Convert numpy types
            'recommended_section_id': int(rec['section_id']),
            'course_name': rec['course_name'],
            'cluster': rec.get('cluster', ''),
            'credits': int(rec.get('credits', 0)),
            # Ensure time_slot_id is an integer (0 is valid, so check for None)            # Ensure time_slot_id is an integer (handle None and 0 properly)            # Ensure time_slot_id is an integer (0 is valid, so check for None)            'time_slot': int(time_slot_id) if time_slot_id is not None else None,            'recommendation_score': str(rec.get('score', '1.0')),  # Default score
            'why_recommended': why_recommended_str,
            'slot_number': slot_num,  # Position in semester (1-5)
            'model_version': model_version,
            'time_preference': time_preference,
            'semester': semester,
            'year': year
        }
        try:
            create_record(db, RecommendationResult, result_data)
            saved_count += 1
        except Exception as e:
            print(f"  ⚠️  Failed to save recommendation {slot_num}: {e}")
            raise  # Re-raise to be caught by outer try-except
    
    if saved_count > 0:
        print(f"  ✓ Saved {saved_count} recommendations to database for student {student_id}")

if len(data['students']) > 0:
    all_students = data['students']
    print(f"Generating recommendations for {len(all_students)} students...\n")
    
    # Process all students
    for idx, student_row in all_students.iterrows():
        student_id = int(student_row['student_id'])
        student_name = student_row['student_name']
        program_name = student_row['program_name']
        
        print(f"{'='*80}")
        print(f"Student: {student_name} (ID: {student_id})")
        print(f"Program: {program_name}")
        
        # Get student standing and credits
        credits = scheduler.get_student_credits(student_id)
        standing = scheduler.get_student_standing(student_id)
        print(f"Credits Completed: {credits}")
        print(f"Standing: {standing}")
        
        # Get cluster profile
        cluster_profile = scheduler.get_student_cluster_profile(student_id)
        print(f"Cluster Profile: {cluster_profile}")
        
        # Get remaining Gen-Ed requirements
        gened_remaining = scheduler.get_remaining_gened_requirements(student_id)
        print(f"Remaining Gen-Ed Requirements:")
        for group, needed in gened_remaining.items():
            print(f"  Group {group}: {needed} courses needed")
        
        # Generate semester recommendations
        print(f"\nSEMESTER RECOMMENDATIONS (Time Preference: morning)")
        recommendations = scheduler.recommend_semester(student_id, time_preference='any')
        
        if recommendations:
            print(f"Recommended {len(recommendations)} courses:")
            for i, rec in enumerate(recommendations, 1):
                print(f"  {i}. {rec['course_name']} (ID: {rec['course_id']}, Credits: {rec['credits']}, Cluster: {rec['cluster']})")
            
            # Save recommendations to database
            try:
                save_recommendations_to_db(db, scheduler, student_id, recommendations, 
                                          'semester_scheduler_v1', 'morning', 'Fall', 2025)
            except NameError as e:
                print(f"  ⚠️  Error: Function or imports not available: {e}")
                print(f"     Make sure you've run all previous cells")
            except Exception as e:
                print(f"  ⚠️  Warning: Could not save recommendations to database: {e}")
                print(f"     Error type: {type(e).__name__}")
                import traceback
                print(f"     Details: {traceback.format_exc()}")
                print(f"     Make sure the database is initialized (run 'docker compose up')")
            
            # Output as JSON (optional - uncomment if needed)
            # print(f"\nJSON Output:")
            # print(json.dumps(recommendations, indent=2))
        else:
            print("No recommendations available.")
            print("  (Student may have completed all required courses or no sections available)")
        
        print()  # Empty line between students
    
    print(f"{'='*80}")
    print(f"Completed processing {len(all_students)} students.")
else:
    print("No students found in database.")


Generating recommendations for 10 students...

Student: Armen (ID: 1)
Program: BSDS
Credits Completed: 63
Standing: Junior
Cluster Profile: {7: 1, 8: 2, 9: 2, 10: 3, 12: 3, 15: 1, 13: 1, 16: 1}
Remaining Gen-Ed Requirements:
  Group A: 0 courses needed
  Group B: 0 courses needed
  Group C: 1 courses needed

SEMESTER RECOMMENDATIONS (Time Preference: morning)
Recommended 1 courses:
  1. CSE 110 Introduction to Computer Science (ID: 46, Credits: 3, Cluster: Computer Science Foundations, Mathematical Sciences, Technology and Society)
  ✓ Saved 1 recommendations to database for student 1

Student: Alla (ID: 2)
Program: BSDS
Credits Completed: 61
Standing: Junior
Cluster Profile: {7: 5, 8: 3, 9: 3, 10: 4, 12: 4, 14: 1, 15: 1, 16: 2, 13: 1}
Remaining Gen-Ed Requirements:
  Group A: 0 courses needed
  Group B: 0 courses needed
  Group C: 0 courses needed

SEMESTER RECOMMENDATIONS (Time Preference: morning)
Recommended 3 courses:
  1. CS 246 Artificial Intelligence (ID: 15, Credits: 3, Cluste

## 3. View Results

In [6]:
# Display saved recommendations

print("="*80)
print("SAVED RECOMMENDATIONS IN DATABASE")
print("="*80)

try:
    # Query all recommendations from database
    recommendations_df = pd.read_sql_table('recommendation_results', engine)
    
    if len(recommendations_df) == 0:
        print("\n⚠️  No recommendations found in database.")
        print("   Make sure you've run Cell 10 to generate and save recommendations for all students.")
    else:
        print(f"\n✓ Found {len(recommendations_df)} saved recommendations")
        print(f"  Total students with recommendations: {recommendations_df['student_id'].nunique()}")
        
        # Group by student
        for student_id in sorted(recommendations_df['student_id'].unique()):
            student_recs = recommendations_df[recommendations_df['student_id'] == student_id]
            student_name = data['students'][data['students']['student_id'] == student_id]['student_name'].values
            student_name = student_name[0] if len(student_name) > 0 else f"Student {student_id}"
            
            print(f"\n  Student: {student_name} (ID: {student_id})")
            print(f"  Recommendations: {len(student_recs)}")
            for _, rec in student_recs.iterrows():
                print(f"    - {rec['course_name']} (Slot {rec['slot_number']}, Credits: {rec['credits']}, Cluster: {rec['cluster']})")
        
        # Summary statistics
        print(f"\n{'='*80}")
        print("SUMMARY STATISTICS")
        print(f"{'='*80}")
        print(f"Total recommendations: {len(recommendations_df)}")
        print(f"Students with recommendations: {recommendations_df['student_id'].nunique()}")
        print(f"Average recommendations per student: {len(recommendations_df) / recommendations_df['student_id'].nunique():.1f}")
        print(f"Model version: {recommendations_df['model_version'].iloc[0] if len(recommendations_df) > 0 else 'N/A'}")
        print(f"Semester: {recommendations_df['semester'].iloc[0] if len(recommendations_df) > 0 else 'N/A'}")
        print(f"Year: {recommendations_df['year'].iloc[0] if len(recommendations_df) > 0 else 'N/A'}")
        
except Exception as e:
    print(f"\n⚠️  Error querying database: {e}")
    print("   Make sure you've run the previous cell to generate recommendations.")


SAVED RECOMMENDATIONS IN DATABASE

✓ Found 29 saved recommendations
  Total students with recommendations: 9

  Student: Armen (ID: 1)
  Recommendations: 1
    - CSE 110 Introduction to Computer Science (Slot 1, Credits: 3, Cluster: Computer Science Foundations, Mathematical Sciences, Technology and Society)

  Student: Alla (ID: 2)
  Recommendations: 3
    - CS 246 Artificial Intelligence (Slot 1, Credits: 3, Cluster: Core)
    - DS 116 Data Visualization (Slot 2, Credits: 3, Cluster: Core)
    - FND 221 Armenian History 1 (Slot 3, Credits: 4, Cluster: Foundation)

  Student: Levon (ID: 3)
  Recommendations: 3
    - DS 110 Statistics 2 (Slot 1, Credits: 4, Cluster: Core)
    - CHSS 170 Religion in America (Slot 2, Credits: 3, Cluster: Arts and Humanities, Social Sciences, Philosophy and Ethics, Social Psychology and Behavior, Critical Thinking and Analysis)
    - FND 221 Armenian History 1 (Slot 3, Credits: 4, Cluster: Foundation)

  Student: Marieta (ID: 4)
  Recommendations: 4
    -