# Simple code with one class

In [10]:
import numpy as np
import random
from collections import defaultdict
from tabulate import tabulate  # For pretty printing the timetable

# [Previous TimetableEnvironment and QLearningAgent classes remain the same...]

def display_timetable(timetable, periods_per_day):
    """Format and display the timetable in a readable way"""
    days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']
    headers = ['Day'] + [f'Period {i+1}' for i in range(periods_per_day)]
    
    table_data = []
    for day_idx, day in enumerate(days):
        row = [day]
        for period in range(periods_per_day):
            subject = timetable[day_idx, period]
            row.append(subject if subject else '-')
        table_data.append(row)
    
    print("\nGenerated Timetable:")
    print(tabulate(table_data, headers=headers, tablefmt='grid'))

def verify_timetable(timetable, teachers, subjects, class_requirements):
    """Verify if the generated timetable meets all constraints"""
    days, periods = timetable.shape
    violations = []
    
    # Count subject hours
    subject_count = defaultdict(int)
    for day in range(days):
        for period in range(periods):
            if timetable[day, period]:
                subject_count[timetable[day, period]] += 1
    
    # Check requirements
    for subject, required in class_requirements.items():
        if subject_count[subject] != required:
            violations.append(f"Subject {subject} has {subject_count[subject]} hours instead of required {required}")
    
    # Check teacher conflicts
    for day in range(days):
        for period in range(periods):
            current_teachers = set()
            current_subject = timetable[day, period]
            if current_subject:
                for teacher, subjects_taught in teachers.items():
                    if current_subject in subjects_taught:
                        if teacher in current_teachers:
                            violations.append(f"Teacher conflict at Day {day+1}, Period {period+1}")
                        current_teachers.add(teacher)
    
    return violations

# Example usage with output
if __name__ == "__main__":
    # Define parameters
    days = 5
    periods_per_day = 4
    teachers = {
        'T1': ['MATH101', 'MATH102'],
        'T2': ['PHY101', 'PHY_LAB'],
        'T3': ['CHEM101', 'CHEM_LAB']
    }
    subjects = {
        'MATH101': 1,
        'MATH102': 1,
        'PHY101': 1,
        'PHY_LAB': 2,
        'CHEM101': 1,
        'CHEM_LAB': 2
    }
    class_requirements = {
        'MATH101': 3,
        'MATH102': 3,
        'PHY101': 3,
        'PHY_LAB': 4,
        'CHEM101': 2,
        'CHEM_LAB': 4
    }

    # Create and train the model
    env = TimetableEnvironment(days, periods_per_day, teachers, subjects, class_requirements)
    agent = QLearningAgent(list(subjects.keys()))
    print("Training the model...")
    best_timetable = train_timetable_generator(env, agent, episodes=1000)

    # Display the results
    display_timetable(best_timetable, periods_per_day)
    
    # Verify the timetable
    violations = verify_timetable(best_timetable, teachers, subjects, class_requirements)
    if violations:
        print("\nConstraint Violations Found:")
        for violation in violations:
            print(f"- {violation}")
    else:
        print("\nAll constraints satisfied!")

    # Display subject-wise statistics
    print("\nSubject-wise Distribution:")
    subject_count = defaultdict(int)
    for day in range(days):
        for period in range(periods_per_day):
            if best_timetable[day, period]:
                subject_count[best_timetable[day, period]] += 1
    
    for subject, count in subject_count.items():
        print(f"{subject}: {count} hours (Required: {class_requirements[subject]})")

Training the model...

Generated Timetable:
+-----------+------------+------------+------------+------------+
| Day       | Period 1   | Period 2   | Period 3   | Period 4   |
| Monday    | MATH101    | MATH101    | MATH101    | MATH102    |
+-----------+------------+------------+------------+------------+
| Tuesday   | CHEM_LAB   | PHY101     | PHY_LAB    | CHEM101    |
+-----------+------------+------------+------------+------------+
| Wednesday | PHY_LAB    | CHEM101    | CHEM_LAB   | PHY101     |
+-----------+------------+------------+------------+------------+
| Thursday  | -          | -          | -          | -          |
+-----------+------------+------------+------------+------------+
| Friday    | -          | -          | -          | -          |
+-----------+------------+------------+------------+------------+

Constraint Violations Found:
- Subject MATH102 has 1 hours instead of required 3
- Subject PHY101 has 2 hours instead of required 3
- Subject PHY_LAB has 2 hours i

# For more classes

In [12]:
import numpy as np
import random
from collections import defaultdict
from tabulate import tabulate

class MultiClassTimetableEnvironment:
    def __init__(self, days, periods_per_day, classes, teachers, subjects, class_requirements):
        self.days = days
        self.periods_per_day = periods_per_day
        self.classes = classes  # List of class names ['1A', '1B']
        self.teachers = teachers  # Dict of teacher_id: [subject_codes]
        self.subjects = subjects  # Dict of subject_code: duration (1 or 2 for labs)
        self.class_requirements = class_requirements  # Dict of class_name: {subject_code: weekly_hours}
        
        # Initialize timetable as 4D array: days x periods x classes
        self.timetable = {
            class_name: np.zeros((days, periods_per_day), dtype=object)
            for class_name in classes
        }
        self.current_position = (0, 0, classes[0])  # (day, period, class)
        
    def reset(self):
        self.timetable = {
            class_name: np.zeros((self.days, self.periods_per_day), dtype=object)
            for class_name in self.classes
        }
        self.current_position = (0, 0, self.classes[0])
        return self._get_state()
    
    def _get_state(self):
        return {
            'position': self.current_position,
            'timetable': {k: v.copy() for k, v in self.timetable.items()},
            'remaining_classes': self._get_remaining_classes()
        }
    
    def _get_remaining_classes(self):
        remaining = {}
        for class_name in self.classes:
            scheduled_classes = defaultdict(int)
            for day in range(self.days):
                for period in range(self.periods_per_day):
                    if self.timetable[class_name][day, period]:
                        subject = self.timetable[class_name][day, period]
                        scheduled_classes[subject] += 1
            
            remaining[class_name] = {}
            for subject, required in self.class_requirements[class_name].items():
                remaining[class_name][subject] = required - scheduled_classes[subject]
        return remaining
    
    def _check_constraints(self, day, period, subject, current_class):
        # Find teacher for this subject
        teacher_id = None
        for t_id, subjects in self.teachers.items():
            if subject in subjects:
                teacher_id = t_id
                break
                
        if teacher_id is None:
            return False
            
        # Check if teacher is already teaching any other class at this time
        for class_name in self.classes:
            if self.timetable[class_name][day, period]:
                current_subject = self.timetable[class_name][day, period]
                if current_subject in self.teachers[teacher_id]:
                    return False
        
        # Check if enough continuous periods for labs
        if self.subjects[subject] == 2:  # Lab class
            if period >= self.periods_per_day - 1:
                return False
            if self.timetable[current_class][day, period + 1]:
                return False
                
        return True
    
    def step(self, action):
        day, period, current_class = self.current_position
        reward = 0
        done = False
        
        # Try to place the subject
        if self._check_constraints(day, period, action, current_class):
            self.timetable[current_class][day, period] = action
            if self.subjects[action] == 2:  # Lab class
                self.timetable[current_class][day, period + 1] = action
            reward = 1
        else:
            reward = -1
            
        # Move to next position
        class_idx = self.classes.index(current_class)
        class_idx += 1
        if class_idx >= len(self.classes):
            class_idx = 0
            period += 1
            if period >= self.periods_per_day:
                period = 0
                day += 1
        
        if day >= self.days:
            done = True
            # Additional reward if all requirements met
            if all(all(v == 0 for v in class_reqs.values()) 
                   for class_reqs in self._get_remaining_classes().values()):
                reward += 100
                
        self.current_position = (day, period, self.classes[class_idx])
        return self._get_state(), reward, done

class QLearningAgent:
    def __init__(self, action_space, learning_rate=0.1, discount_factor=0.95, epsilon=0.1):
        self.q_table = defaultdict(lambda: defaultdict(float))
        self.lr = learning_rate
        self.gamma = discount_factor
        self.epsilon = epsilon
        self.action_space = action_space
    
    def get_action(self, state, valid_actions):
        if random.random() < self.epsilon:
            return random.choice(valid_actions)
        
        state_key = str(state)
        q_values = {action: self.q_table[state_key][action] 
                   for action in valid_actions}
        return max(q_values.items(), key=lambda x: x[1])[0]
    
    def update(self, state, action, reward, next_state):
        state_key = str(state)
        next_state_key = str(next_state)
        
        next_max_q = max(self.q_table[next_state_key].values()) if self.q_table[next_state_key] else 0
        current_q = self.q_table[state_key][action]
        new_q = current_q + self.lr * (reward + self.gamma * next_max_q - current_q)
        self.q_table[state_key][action] = new_q

def train_timetable_generator(env, agent, episodes=1000):
    best_reward = float('-inf')
    best_timetable = None
    
    for episode in range(episodes):
        state = env.reset()
        total_reward = 0
        done = False
        
        while not done:
            current_class = state['position'][2]
            valid_actions = [subject for subject, remaining 
                           in state['remaining_classes'][current_class].items() 
                           if remaining > 0]
            
            if not valid_actions:
                break
                
            action = agent.get_action(state, valid_actions)
            next_state, reward, done = env.step(action)
            
            agent.update(state, action, reward, next_state)
            state = next_state
            total_reward += reward
        
        if total_reward > best_reward:
            best_reward = total_reward
            best_timetable = {k: v.copy() for k, v in env.timetable.items()}
            
    return best_timetable

def display_timetable(timetable_dict, periods_per_day):
    days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']
    headers = ['Day'] + [f'Period {i+1}' for i in range(periods_per_day)]
    
    for class_name, timetable in timetable_dict.items():
        print(f"\nTimetable for Class {class_name}:")
        table_data = []
        for day_idx, day in enumerate(days):
            row = [day]
            for period in range(periods_per_day):
                subject = timetable[day_idx, period]
                row.append(subject if subject else '-')
            table_data.append(row)
        print(tabulate(table_data, headers=headers, tablefmt='grid'))

def verify_timetable(timetable_dict, teachers, subjects, class_requirements):
    violations = []
    days, periods = next(iter(timetable_dict.values())).shape
    
    # Check teacher conflicts across classes
    for day in range(days):
        for period in range(periods):
            teacher_subjects = defaultdict(list)
            
            # Collect all subjects being taught in this period
            for class_name, timetable in timetable_dict.items():
                subject = timetable[day, period]
                if subject:
                    # Find teacher for this subject
                    for teacher, subjects_taught in teachers.items():
                        if subject in subjects_taught:
                            teacher_subjects[teacher].append((class_name, subject))
            
            # Check for conflicts
            for teacher, assignments in teacher_subjects.items():
                if len(assignments) > 1:
                    classes = [f"{class_name}({subject})" for class_name, subject in assignments]
                    violations.append(
                        f"Teacher {teacher} has multiple classes at Day {day+1}, "
                        f"Period {period+1}: {', '.join(classes)}"
                    )
    
    # Check class requirements
    for class_name, timetable in timetable_dict.items():
        subject_count = defaultdict(int)
        for day in range(days):
            for period in range(periods):
                if timetable[day, period]:
                    subject_count[timetable[day, period]] += 1
        
        for subject, required in class_requirements[class_name].items():
            if subject_count[subject] != required:
                violations.append(
                    f"Class {class_name}: Subject {subject} has {subject_count[subject]} "
                    f"hours instead of required {required}"
                )
    
    return violations

# Example usage
if __name__ == "__main__":
    days = 5
    periods_per_day = 3
    classes = ['1A', '1B']
    
    teachers = {
        'T1': ['MATH101', 'MATH102'],
        'T2': ['PHY101', 'PHY_LAB'],
        'T3': ['CHEM101', 'CHEM_LAB']
    }
    
    subjects = {
        'MATH101': 1,
        'MATH102': 1,
        'PHY101': 1,
        'PHY_LAB': 2,
        'CHEM101': 1,
        'CHEM_LAB': 2
    }
    
    # Define requirements for each class
    class_requirements = {
        '1A': {
            'MATH101': 3,
            'MATH102': 3,
            'PHY101': 3,
            'PHY_LAB': 2,
            'CHEM101': 3,
            'CHEM_LAB': 2
        },
        '1B': {
            'MATH101': 3,
            'MATH102': 3,
            'PHY101': 3,
            'PHY_LAB': 2,
            'CHEM101': 3,
            'CHEM_LAB': 2
        }
    }

    # Create and train the model
    env = MultiClassTimetableEnvironment(
        days, periods_per_day, classes, teachers, subjects, class_requirements
    )
    agent = QLearningAgent(list(subjects.keys()))
    
    print("Training the model...")
    best_timetable = train_timetable_generator(env, agent, episodes=1000)

    # Display the results
    display_timetable(best_timetable, periods_per_day)
    
    # Verify the timetable
    violations = verify_timetable(best_timetable, teachers, subjects, class_requirements)
    if violations:
        print("\nConstraint Violations Found:")
        for violation in violations:
            print(f"- {violation}")
    else:
        print("\nAll constraints satisfied!")

    # Display subject-wise statistics for each class
    for class_name in classes:
        print(f"\nClass {class_name} Subject Distribution:")
        subject_count = defaultdict(int)
        for day in range(days):
            for period in range(periods_per_day):
                if best_timetable[class_name][day, period]:
                    subject_count[best_timetable[class_name][day, period]] += 1
        
        for subject, count in subject_count.items():
            print(f"{subject}: {count} hours (Required: {class_requirements[class_name][subject]})")

Training the model...

Timetable for Class 1A:
+-----------+------------+------------+------------+
| Day       | Period 1   | Period 2   | Period 3   |
| Monday    | MATH101    | MATH101    | MATH101    |
+-----------+------------+------------+------------+
| Tuesday   | CHEM101    | CHEM101    | MATH102    |
+-----------+------------+------------+------------+
| Wednesday | MATH102    | CHEM_LAB   | MATH102    |
+-----------+------------+------------+------------+
| Thursday  | PHY101     | PHY101     | PHY101     |
+-----------+------------+------------+------------+
| Friday    | CHEM_LAB   | PHY_LAB    | CHEM101    |
+-----------+------------+------------+------------+

Timetable for Class 1B:
+-----------+------------+------------+------------+
| Day       | Period 1   | Period 2   | Period 3   |
| Monday    | PHY101     | PHY101     | PHY101     |
+-----------+------------+------------+------------+
| Tuesday   | MATH101    | PHY_LAB    | CHEM101    |
+-----------+------------+-

# New code

In [None]:
import numpy as np
import random
from collections import defaultdict
from tabulate import tabulate

# [Previous imports and MultiClassTimetableEnvironment class remain the same]

class QLearningAgent:
    def __init__(self, action_space, learning_rate=0.1, discount_factor=0.95, epsilon=0.1):
        """
        Initialize Q-Learning Agent
        
        Parameters:
        action_space: List of all possible actions (subjects)
        learning_rate: Alpha value for learning rate (default 0.1)
        discount_factor: Gamma value for future reward discount (default 0.95)
        epsilon: For epsilon-greedy action selection (default 0.1)
        """
        self.q_table = defaultdict(lambda: defaultdict(float))
        self.lr = learning_rate
        self.gamma = discount_factor
        self.epsilon = epsilon
        self.action_space = action_space
    
    def get_action(self, state, valid_actions):
        """
        Select action using epsilon-greedy policy
        
        Parameters:
        state: Current state of the environment
        valid_actions: List of currently valid actions
        """
        # Exploration
        if random.random() < self.epsilon:
            return random.choice(valid_actions)
        
        # Exploitation
        state_key = str(state)
        q_values = {action: self.q_table[state_key][action] 
                   for action in valid_actions}
        
        # If all Q-values are 0, choose randomly
        if all(value == 0 for value in q_values.values()):
            return random.choice(valid_actions)
        
        # Choose action with highest Q-value
        return max(q_values.items(), key=lambda x: x[1])[0]
    
    def update(self, state, action, reward, next_state):
        """
        Update Q-value for state-action pair
        
        Parameters:
        state: Current state
        action: Taken action
        reward: Received reward
        next_state: Resulting state
        """
        state_key = str(state)
        next_state_key = str(next_state)
        
        # Get maximum Q-value for next state
        next_max_q = max(self.q_table[next_state_key].values()) if self.q_table[next_state_key] else 0
        
        # Update Q-value using Q-learning formula
        current_q = self.q_table[state_key][action]
        new_q = current_q + self.lr * (reward + self.gamma * next_max_q - current_q)
        self.q_table[state_key][action] = new_q

def train_timetable_generator(env, agent, episodes=1000):
    """
    Train the timetable generator using Q-learning
    
    Parameters:
    env: MultiClassTimetableEnvironment instance
    agent: QLearningAgent instance
    episodes: Number of training episodes
    
    Returns:
    best_timetable: The best timetable found during training
    """
    best_reward = float('-inf')
    best_timetable = None
    
    for episode in range(episodes):
        state = env.reset()
        total_reward = 0
        done = False
        
        while not done:
            current_class = state['position'][2]
            valid_actions = [subject for subject, remaining 
                           in state['remaining_classes'][current_class].items() 
                           if remaining > 0]
            
            if not valid_actions:
                break
                
            action = agent.get_action(state, valid_actions)
            next_state, reward, done = env.step(action)
            
            agent.update(state, action, reward, next_state)
            state = next_state
            total_reward += reward
        
        if total_reward > best_reward:
            best_reward = total_reward
            best_timetable = {k: v.copy() for k, v in env.timetable.items()}
            
        if episode % 100 == 0:
            print(f"Episode {episode}, Best Reward: {best_reward}")
    
    return best_timetable

def display_timetable(timetable_dict, periods_per_day):
    """
    Display the generated timetable in a readable format
    """
    days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']
    headers = ['Day'] + [f'Period {i+1}' for i in range(periods_per_day)]
    
    for class_name, timetable in timetable_dict.items():
        print(f"\nTimetable for Class {class_name}:")
        table_data = []
        for day_idx, day in enumerate(days):
            row = [day]
            for period in range(periods_per_day):
                subject = timetable[day_idx, period]
                row.append(subject if subject else '-')
            table_data.append(row)
        print(tabulate(table_data, headers=headers, tablefmt='grid'))

class MultiClassTimetableEnvironment:
    def __init__(self, days, periods_per_day, classes, teachers, subjects, class_requirements):
        self.days = days
        self.periods_per_day = periods_per_day
        self.classes = classes
        self.teachers = teachers
        self.subjects = subjects  # Dict of subject_code: is_lab (True/False)
        self.class_requirements = class_requirements
        
        # Validate lab hours are even numbers
        self._validate_lab_requirements()
        
        # Initialize timetable
        self.timetable = self._create_empty_timetable()
        self.current_position = (0, 0, classes[0])
        
    def _create_empty_timetable(self):
        """Create an empty timetable for all classes"""
        return {
            class_name: np.zeros((self.days, self.periods_per_day), dtype=object)
            for class_name in self.classes
        }
    
    def reset(self):
        """Reset the environment to initial state"""
        # Reset timetable to empty
        self.timetable = self._create_empty_timetable()
        # Reset position to start
        self.current_position = (0, 0, self.classes[0])
        # Return initial state
        return self._get_state()
    
    def _get_state(self):
        """Get current state of the environment"""
        return {
            'position': self.current_position,
            'timetable': {k: v.copy() for k, v in self.timetable.items()},
            'remaining_classes': self._get_remaining_classes()
        }
    
    def _check_constraints(self, day, period, subject, current_class):
        """Check if regular class can be scheduled"""
        if self.subjects[subject]['is_lab']:
            return self._check_lab_constraints(day, period, subject, current_class)
            
        # Find teacher for this subject
        teacher_id = None
        for t_id, subjects in self.teachers.items():
            if subject in subjects:
                teacher_id = t_id
                break
                
        if teacher_id is None:
            return False
            
        # Check if teacher is already teaching any other class at this time
        for class_name in self.classes:
            if self.timetable[class_name][day, period]:
                current_subject = self.timetable[class_name][day, period]
                if current_subject in self.teachers[teacher_id]:
                    return False
        
        return True

    def _validate_lab_requirements(self):
        """Ensure all lab subjects have even number of hours"""
        for class_name, requirements in self.class_requirements.items():
            for subject, hours in requirements.items():
                if self.subjects[subject]['is_lab'] and hours % 2 != 0:
                    raise ValueError(f"Lab subject {subject} for class {class_name} must have even hours. Current: {hours}")
    
    def _check_lab_constraints(self, day, period, subject, current_class):
        """Check if a lab can be scheduled at this position"""
        if not self.subjects[subject]['is_lab']:
            return True
            
        # Check if we have enough continuous periods
        if period >= self.periods_per_day - 1:
            return False
            
        # Check if next period is free
        if self.timetable[current_class][day, period + 1]:
            return False
            
        # Check teacher availability for both periods
        teacher_id = None
        for t_id, subjects in self.teachers.items():
            if subject in subjects:
                teacher_id = t_id
                break
                
        if teacher_id is None:
            return False
            
        # Check if teacher is available in both periods
        for class_name in self.classes:
            if (self.timetable[class_name][day, period] and 
                self.timetable[class_name][day, period] in self.teachers[teacher_id]):
                return False
            if (self.timetable[class_name][day, period + 1] and 
                self.timetable[class_name][day, period + 1] in self.teachers[teacher_id]):
                return False
                
        return True
    
    def _get_remaining_classes(self):
        remaining = {}
        for class_name in self.classes:
            scheduled_classes = defaultdict(int)
            for day in range(self.days):
                for period in range(self.periods_per_day):
                    if self.timetable[class_name][day, period]:
                        subject = self.timetable[class_name][day, period]
                        if self.subjects[subject]['is_lab']:
                            # Count lab hours as 2 for each slot
                            if period == 0 or self.timetable[class_name][day, period-1] != subject:
                                scheduled_classes[subject] += 2
                        else:
                            scheduled_classes[subject] += 1
            
            remaining[class_name] = {}
            for subject, required in self.class_requirements[class_name].items():
                remaining[class_name][subject] = required - scheduled_classes[subject]
        return remaining
    
    def step(self, action):
        day, period, current_class = self.current_position
        reward = 0
        done = False
        
        if self.subjects[action]['is_lab']:
            # Handle lab scheduling
            if self._check_lab_constraints(day, period, action, current_class):
                self.timetable[current_class][day, period] = action
                self.timetable[current_class][day, period + 1] = action
                reward = 1
                period += 1  # Skip next period as it's used by the lab
            else:
                reward = -1
        else:
            # Handle regular class scheduling
            if self._check_constraints(day, period, action, current_class):
                self.timetable[current_class][day, period] = action
                reward = 1
            else:
                reward = -1
        
        # Move to next position
        class_idx = self.classes.index(current_class)
        class_idx += 1
        if class_idx >= len(self.classes):
            class_idx = 0
            period += 1
            if period >= self.periods_per_day:
                period = 0
                day += 1
        
        if day >= self.days:
            done = True
            if all(all(v == 0 for v in class_reqs.values()) 
                   for class_reqs in self._get_remaining_classes().values()):
                reward += 100
                
        self.current_position = (day, period, self.classes[class_idx])
        return self._get_state(), reward, done
    # [Rest of the methods remain the same as in previous code]


# Example usage
if __name__ == "__main__":
    days = 5
    periods_per_day = 3
    classes = ['1A', '1B']
    
    # Define subjects with lab indicator
    subjects = {
        'MATH': {'is_lab': False},
        'PHYSICS': {'is_lab': False},
        'CHEMISTRY': {'is_lab': False},
        'PHYSICS_LAB': {'is_lab': True},
        'CHEMISTRY_LAB': {'is_lab': True}
    }
    
    teachers = {
        'T1': ['MATH'],
        'T2': ['PHYSICS', 'PHYSICS_LAB'],
        'T3': ['CHEMISTRY', 'CHEMISTRY_LAB']
    }
    
    # Define requirements (lab hours must be even)
    class_requirements = {
        '1A': {
            'MATH': 3,
            'PHYSICS': 4,
            'CHEMISTRY': 4,
            'PHYSICS_LAB': 2,
            'CHEMISTRY_LAB': 2   # Will be scheduled as 2 sessions of 2 hours each
        },
        '1B': {
            'MATH': 4,
            'PHYSICS': 3,
            'CHEMISTRY': 4,
            'PHYSICS_LAB': 2,
            'CHEMISTRY_LAB': 2
        }
    }

    # Create and train the model
    env = MultiClassTimetableEnvironment(
        days, periods_per_day, classes, teachers, subjects, class_requirements
    )
    agent = QLearningAgent(list(subjects.keys()))
    
    print("Training the model...")
    best_timetable = train_timetable_generator(env, agent, episodes=1000)

    # Display results
    display_timetable(best_timetable, periods_per_day)

Training the model...
Episode 0, Best Reward: 16
Episode 100, Best Reward: 24
Episode 200, Best Reward: 24
Episode 300, Best Reward: 24
Episode 400, Best Reward: 24
Episode 500, Best Reward: 24
Episode 600, Best Reward: 24
Episode 700, Best Reward: 24
Episode 800, Best Reward: 24
Episode 900, Best Reward: 24

Timetable for Class 1A:
+-----------+---------------+---------------+-------------+
| Day       | Period 1      | Period 2      | Period 3    |
| Monday    | PHYSICS       | PHYSICS_LAB   | PHYSICS_LAB |
+-----------+---------------+---------------+-------------+
| Tuesday   | CHEMISTRY     | CHEMISTRY     | MATH        |
+-----------+---------------+---------------+-------------+
| Wednesday | CHEMISTRY_LAB | CHEMISTRY_LAB | CHEMISTRY   |
+-----------+---------------+---------------+-------------+
| Thursday  | PHYSICS       | PHYSICS       | MATH        |
+-----------+---------------+---------------+-------------+
| Friday    | PHYSICS       | CHEMISTRY     | -           |
+----

# Structured input

In [2]:
import numpy as np
import random
from collections import defaultdict
from tabulate import tabulate

class QLearningAgent:
    def __init__(self, action_space, learning_rate=0.1, discount_factor=0.95, epsilon=0.1):
        """
        Initialize Q-Learning Agent
        
        Parameters:
        action_space: List of all possible actions (subjects)
        learning_rate: Alpha value for learning rate (default 0.1)
        discount_factor: Gamma value for future reward discount (default 0.95)
        epsilon: For epsilon-greedy action selection (default 0.1)
        """
        self.q_table = defaultdict(lambda: defaultdict(float))
        self.lr = learning_rate
        self.gamma = discount_factor
        self.epsilon = epsilon
        self.action_space = action_space
    
    def get_action(self, state, valid_actions):
        """
        Select action using epsilon-greedy policy
        
        Parameters:
        state: Current state of the environment
        valid_actions: List of currently valid actions
        """
        # Exploration
        if random.random() < self.epsilon:
            return random.choice(valid_actions)
        
        # Exploitation
        state_key = str(state)
        q_values = {action: self.q_table[state_key][action] 
                   for action in valid_actions}
        
        # If all Q-values are 0, choose randomly
        if all(value == 0 for value in q_values.values()):
            return random.choice(valid_actions)
        
        # Choose action with highest Q-value
        return max(q_values.items(), key=lambda x: x[1])[0]
    
    def update(self, state, action, reward, next_state):
        """
        Update Q-value for state-action pair
        
        Parameters:
        state: Current state
        action: Taken action
        reward: Received reward
        next_state: Resulting state
        """
        state_key = str(state)
        next_state_key = str(next_state)
        
        # Get maximum Q-value for next state
        next_max_q = max(self.q_table[next_state_key].values()) if self.q_table[next_state_key] else 0
        
        # Update Q-value using Q-learning formula
        current_q = self.q_table[state_key][action]
        new_q = current_q + self.lr * (reward + self.gamma * next_max_q - current_q)
        self.q_table[state_key][action] = new_q

def train_timetable_generator(env, agent, episodes=1000):
    """
    Train the timetable generator using Q-learning
    
    Parameters:
    env: MultiClassTimetableEnvironment instance
    agent: QLearningAgent instance
    episodes: Number of training episodes
    
    Returns:
    best_timetable: The best timetable found during training
    """
    best_reward = float('-inf')
    best_timetable = None
    
    for episode in range(episodes):
        state = env.reset()
        total_reward = 0
        done = False
        
        while not done:
            current_class = state['position'][2]
            valid_actions = [subject for subject, remaining 
                           in state['remaining_classes'][current_class].items() 
                           if remaining > 0]
            
            if not valid_actions:
                break
                
            action = agent.get_action(state, valid_actions)
            next_state, reward, done = env.step(action)
            
            agent.update(state, action, reward, next_state)
            state = next_state
            total_reward += reward
        
        if total_reward > best_reward:
            best_reward = total_reward
            best_timetable = {k: v.copy() for k, v in env.timetable.items()}
            
        if episode % 100 == 0:
            print(f"Episode {episode}, Best Reward: {best_reward}")
    
    return best_timetable

def display_timetable(timetable_dict, periods_per_day):
    """
    Display the generated timetable in a readable format
    """
    days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']
    headers = ['Day'] + [f'Period {i+1}' for i in range(periods_per_day)]
    
    for class_name, timetable in timetable_dict.items():
        print(f"\nTimetable for Class {class_name}:")
        table_data = []
        for day_idx, day in enumerate(days):
            row = [day]
            for period in range(periods_per_day):
                subject = timetable[day_idx, period]
                row.append(subject if subject else '-')
            table_data.append(row)
        print(tabulate(table_data, headers=headers, tablefmt='grid'))

class MultiClassTimetableEnvironment:
    def __init__(self, days, periods_per_day, lunch_after_period, classes, teachers, subjects, class_requirements):
        self.days = days
        self.periods_per_day = periods_per_day
        self.lunch_after_period = lunch_after_period
        self.classes = classes
        self.teachers = teachers  # Now contains teacher codes and their subjects
        self.subjects = subjects
        self.class_requirements = class_requirements  # Modified to include teacher assignments
        
        # Validate lab hours are even numbers
        self._validate_lab_requirements()
        
        # Initialize timetable
        self.timetable = self._create_empty_timetable()
        self.current_position = (0, 0, classes[0])

    def _validate_lab_requirements(self):
        """Ensure all lab subjects have even number of hours"""
        for class_name, requirements in self.class_requirements.items():
            for subject, req_dict in requirements.items():
                if subject in self.subjects and self.subjects[subject]['is_lab']:
                    lectures = req_dict['lectures']
                    if lectures % 2 != 0:
                        raise ValueError(f"Lab subject {subject} for class {class_name} must have even number of lectures. Current: {lectures}")

    def _create_empty_timetable(self):
        """Create an empty timetable for all classes with lunch break"""
        timetables = {}
        for class_name in self.classes:
            timetable = np.zeros((self.days, self.periods_per_day + 1), dtype=object)  # +1 for lunch break
            # Mark lunch break slots
            for day in range(self.days):
                timetable[day, self.lunch_after_period] = 'LUNCH'
            timetables[class_name] = timetable
        return timetables
    
    def _check_constraints(self, day, period, subject, current_class):
        """Check if regular class can be scheduled"""
        # Check if it's lunch period
        if period == self.lunch_after_period:
            return False
            
        if self.subjects[subject]['is_lab']:
            return self._check_lab_constraints(day, period, subject, current_class)
            
        # Get assigned teacher for this subject and class
        teacher_id = self.class_requirements[current_class][subject]['teacher_code']
        
        if teacher_id not in self.teachers:
            return False
            
        # Check if teacher is already teaching any other class at this time
        for class_name in self.classes:
            current_slot = self.timetable[class_name][day, period]
            if current_slot and current_slot != 'LUNCH':
                slot_teacher = self.class_requirements[class_name][current_slot]['teacher_code']
                if slot_teacher == teacher_id:
                    return False
        
        return True

    def _check_lab_constraints(self, day, period, subject, current_class):
        """Check if a lab can be scheduled at this position"""
        if not self.subjects[subject]['is_lab']:
            return True
            
        # Check if we have enough continuous periods
        if period >= self.periods_per_day or period == self.lunch_after_period - 1:
            return False
            
        # Check if next period is free and not lunch
        next_period = period + 1
        if next_period == self.lunch_after_period or self.timetable[current_class][day, next_period]:
            return False
            
        # Get assigned teacher for this lab
        teacher_id = self.class_requirements[current_class][subject]['teacher_code']
        
        # Check if teacher is available in both periods
        for class_name in self.classes:
            for p in [period, next_period]:
                current_slot = self.timetable[class_name][day, p]
                if current_slot and current_slot != 'LUNCH':
                    slot_teacher = self.class_requirements[class_name][current_slot]['teacher_code']
                    if slot_teacher == teacher_id:
                        return False
                
        return True

    def _get_state(self):
        """Get current state of the environment"""
        return {
            'position': self.current_position,
            'timetable': {k: v.copy() for k, v in self.timetable.items()},
            'remaining_classes': self._get_remaining_classes()
        }

    def reset(self):
        """Reset the environment to initial state"""
        self.timetable = self._create_empty_timetable()
        self.current_position = (0, 0, self.classes[0])
        return self._get_state()

    def step(self, action):
        day, period, current_class = self.current_position
        reward = 0
        done = False
        
        # Skip lunch period
        if period == self.lunch_after_period:
            period += 1
            
        if self.subjects[action]['is_lab']:
            if self._check_lab_constraints(day, period, action, current_class):
                self.timetable[current_class][day, period] = action
                self.timetable[current_class][day, period + 1] = action
                reward = 1
                period += 1
            else:
                reward = -1
        else:
            if self._check_constraints(day, period, action, current_class):
                self.timetable[current_class][day, period] = action
                reward = 1
            else:
                reward = -1
        
        # Move to next position
        class_idx = self.classes.index(current_class)
        class_idx += 1
        if class_idx >= len(self.classes):
            class_idx = 0
            period += 1
            if period >= self.periods_per_day + 1:  # +1 for lunch
                period = 0
                day += 1
        
        if day >= self.days:
            done = True
            remaining = self._get_remaining_classes()
            if all(all(v == 0 for v in class_reqs.values()) 
                   for class_reqs in remaining.values()):
                reward += 100
                
        self.current_position = (day, period, self.classes[class_idx])
        return self._get_state(), reward, done

    def _get_remaining_classes(self):
        remaining = {}
        for class_name in self.classes:
            scheduled_classes = defaultdict(int)
            for day in range(self.days):
                for period in range(self.periods_per_day + 1):  # +1 for lunch
                    if self.timetable[class_name][day, period] and self.timetable[class_name][day, period] != 'LUNCH':
                        subject = self.timetable[class_name][day, period]
                        if self.subjects[subject]['is_lab']:
                            if period == 0 or self.timetable[class_name][day, period-1] != subject:
                                scheduled_classes[subject] += 2
                        else:
                            scheduled_classes[subject] += 1
            
            remaining[class_name] = {}
            for subject in self.class_requirements[class_name]:
                if subject != 'teacher_code':
                    required = self.class_requirements[class_name][subject]['lectures']
                    remaining[class_name][subject] = required - scheduled_classes[subject]
        return remaining


# Example usage
if __name__ == "__main__":
    days = 5
    periods_per_day = 3  # Total periods including lunch
    lunch_after_period = 2  # Lunch after 2nd period
    classes = ['1A', '1B']
    
    # Define subjects with lab indicator
    subjects = {
        'MATH': {'is_lab': False},
        'PHYSICS': {'is_lab': False},
        'CHEMISTRY': {'is_lab': False},
        'PHYSICS_LAB': {'is_lab': True},
        'CHEMISTRY_LAB': {'is_lab': True}
    }
    
    # Define teachers with unique codes
    teachers = {
        'AP001': ['MATH'],
        'AP002': ['PHYSICS', 'PHYSICS_LAB'],
        'AP003': ['CHEMISTRY', 'CHEMISTRY_LAB']
    }
    
    # Define requirements with teacher assignments
    class_requirements = {
        '1A': {
            'MATH': {'teacher_code': 'AP001', 'lectures': 3},
            'PHYSICS': {'teacher_code': 'AP002', 'lectures': 4},
            'CHEMISTRY': {'teacher_code': 'AP003', 'lectures': 4},
            'PHYSICS_LAB': {'teacher_code': 'AP002', 'lectures': 2},
            'CHEMISTRY_LAB': {'teacher_code': 'AP003', 'lectures': 2}
        },
        '1B': {
            'MATH': {'teacher_code': 'AP001', 'lectures': 4},
            'PHYSICS': {'teacher_code': 'AP002', 'lectures': 3},
            'CHEMISTRY': {'teacher_code': 'AP003', 'lectures': 4},
            'PHYSICS_LAB': {'teacher_code': 'AP002', 'lectures': 2},
            'CHEMISTRY_LAB': {'teacher_code': 'AP003', 'lectures': 2}
        }
    }

    # Create and train the model
    env = MultiClassTimetableEnvironment(
        days, periods_per_day, lunch_after_period, classes, teachers, subjects, class_requirements
    )
    agent = QLearningAgent(list(subjects.keys()))
    
    print("Training the model...")
    best_timetable = train_timetable_generator(env, agent, episodes=1000)
    
    # Display results
    display_timetable(best_timetable, periods_per_day + 1)  # +1 for lunch period

Training the model...
Episode 0, Best Reward: 10
Episode 100, Best Reward: 22
Episode 200, Best Reward: 22
Episode 300, Best Reward: 22
Episode 400, Best Reward: 22
Episode 500, Best Reward: 22
Episode 600, Best Reward: 24
Episode 700, Best Reward: 24
Episode 800, Best Reward: 24
Episode 900, Best Reward: 24

Timetable for Class 1A:
+-----------+-------------+-------------+------------+------------+
| Day       | Period 1    | Period 2    | Period 3   | Period 4   |
| Monday    | PHYSICS     | -           | LUNCH      | CHEMISTRY  |
+-----------+-------------+-------------+------------+------------+
| Tuesday   | MATH        | CHEMISTRY   | LUNCH      | CHEMISTRY  |
+-----------+-------------+-------------+------------+------------+
| Wednesday | PHYSICS     | CHEMISTRY   | LUNCH      | PHYSICS    |
+-----------+-------------+-------------+------------+------------+
| Thursday  | PHYSICS_LAB | PHYSICS_LAB | LUNCH      | MATH       |
+-----------+-------------+-------------+------------

# Enhance tt

In [5]:
import numpy as np
import random
from collections import defaultdict
from tabulate import tabulate
import copy


def train_timetable_generator(env, agent, episodes=1000):
    """
    Train the timetable generator using Q-learning
    
    Parameters:
    env: MultiClassTimetableEnvironment instance
    agent: QLearningAgent instance
    episodes: Number of training episodes
    
    Returns:
    best_timetable: The best timetable found during training
    """
    best_reward = float('-inf')
    best_timetable = None
    
    for episode in range(episodes):
        state = env.reset()
        total_reward = 0
        done = False
        
        while not done:
            current_class = state['position'][2]
            valid_actions = [subject for subject, remaining 
                           in state['remaining_classes'][current_class].items() 
                           if remaining > 0]
            
            if not valid_actions:
                break
                
            action = agent.get_action(state, valid_actions)
            next_state, reward, done = env.step(action)
            
            agent.update(state, action, reward, next_state)
            state = next_state
            total_reward += reward
        
        if total_reward > best_reward:
            best_reward = total_reward
            best_timetable = {k: v.copy() for k, v in env.timetable.items()}
            
        if episode % 100 == 0:
            print(f"Episode {episode}, Best Reward: {best_reward}")
    
    return best_timetable

def display_timetable(timetable_dict, periods_per_day):
    """
    Display the generated timetable in a readable format
    """
    days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']
    headers = ['Day'] + [f'Period {i+1}' for i in range(periods_per_day)]
    
    for class_name, timetable in timetable_dict.items():
        print(f"\nTimetable for Class {class_name}:")
        table_data = []
        for day_idx, day in enumerate(days):
            row = [day]
            for period in range(periods_per_day):
                subject = timetable[day_idx, period]
                row.append(subject if subject else '-')
            table_data.append(row)
        print(tabulate(table_data, headers=headers, tablefmt='grid'))
        
class MultiClassTimetableEnvironment:
    def __init__(self, days, periods_per_day, lunch_after_period, classes, teachers, subjects, class_requirements):
        self.days = days
        self.periods_per_day = periods_per_day
        self.lunch_after_period = lunch_after_period
        self.classes = classes
        self.teachers = teachers
        self.subjects = subjects
        self.class_requirements = class_requirements
        
        # Additional scheduling preferences
        self.max_consecutive_same_subject = 2  # Maximum consecutive periods for same subject
        self.preferred_lab_periods = [0, 2]  # Preferred periods for lab sessions (before and after lunch)
        
        self._validate_lab_requirements()
        self._validate_teacher_assignments()
        
        self.timetable = self._create_empty_timetable()
        self.current_position = (0, 0, classes[0])
        self.best_timetable = None
        self.best_score = float('-inf')
        
    def _validate_lab_requirements(self):
        """Ensure all lab subjects have even number of hours"""
        for class_name, requirements in self.class_requirements.items():
            for subject, req_dict in requirements.items():
                if subject in self.subjects and self.subjects[subject]['is_lab']:
                    lectures = req_dict['lectures']
                    if lectures % 2 != 0:
                        raise ValueError(f"Lab subject {subject} for class {class_name} must have even number of lectures. Current: {lectures}")

    def _create_empty_timetable(self):
        """Create an empty timetable for all classes with lunch break"""
        timetables = {}
        for class_name in self.classes:
            timetable = np.zeros((self.days, self.periods_per_day + 1), dtype=object)  # +1 for lunch break
            # Mark lunch break slots
            for day in range(self.days):
                timetable[day, self.lunch_after_period] = 'LUNCH'
            timetables[class_name] = timetable
        return timetables
    
    def _validate_teacher_assignments(self):
        """Validate that all subjects have valid teacher assignments"""
        for class_name, requirements in self.class_requirements.items():
            for subject, req_dict in requirements.items():
                if subject != 'teacher_code':
                    teacher_code = req_dict['teacher_code']
                    if teacher_code not in self.teachers:
                        raise ValueError(f"Invalid teacher code {teacher_code} for {subject} in class {class_name}")
                    if subject not in self.teachers[teacher_code]:
                        raise ValueError(f"Teacher {teacher_code} is not qualified to teach {subject}")

    def _check_daily_load(self, day, teacher_id):
        """Check if teacher has exceeded maximum daily load"""
        max_daily_load = 6  # Maximum periods per day for a teacher
        count = 0
        for class_name in self.classes:
            for period in range(self.periods_per_day + 1):
                current_slot = self.timetable[class_name][day, period]
                if current_slot and current_slot != 'LUNCH':
                    slot_teacher = self.class_requirements[class_name][current_slot]['teacher_code']
                    if slot_teacher == teacher_id:
                        count += 1
        return count < max_daily_load

    def _check_consecutive_subjects(self, day, period, subject, current_class):
        """Check if subject would create too many consecutive periods"""
        if period > 0 and period < self.periods_per_day:
            consecutive_count = 1
            # Check backwards
            p = period - 1
            while p >= 0 and self.timetable[current_class][day, p] == subject:
                consecutive_count += 1
                p -= 1
            # Check forwards
            p = period + 1
            while p < self.periods_per_day and self.timetable[current_class][day, p] == subject:
                consecutive_count += 1
                p += 1
            return consecutive_count <= self.max_consecutive_same_subject
        return True

    def _check_constraints(self, day, period, subject, current_class):
        """Enhanced constraint checking"""
        if period == self.lunch_after_period:
            return False
            
        if self.subjects[subject]['is_lab']:
            return self._check_lab_constraints(day, period, subject, current_class)
            
        teacher_id = self.class_requirements[current_class][subject]['teacher_code']
        
        # Basic teacher availability check
        if not self._check_teacher_availability(day, period, teacher_id):
            return False
            
        # Check daily teacher load
        if not self._check_daily_load(day, teacher_id):
            return False
            
        # Check consecutive subjects
        if not self._check_consecutive_subjects(day, period, subject, current_class):
            return False
            
        # Check subject distribution
        if not self._check_subject_distribution(day, subject, current_class):
            return False
        
        return True

    def _check_teacher_availability(self, day, period, teacher_id):
        """Check if teacher is available"""
        for class_name in self.classes:
            current_slot = self.timetable[class_name][day, period]
            if current_slot and current_slot != 'LUNCH':
                slot_teacher = self.class_requirements[class_name][current_slot]['teacher_code']
                if slot_teacher == teacher_id:
                    return False
        return True

    def _check_subject_distribution(self, day, subject, current_class):
        """Check if subject is well-distributed across the week"""
        max_per_day = 2  # Maximum occurrences of a subject per day
        count = 0
        for period in range(self.periods_per_day + 1):
            if self.timetable[current_class][day, period] == subject:
                count += 1
        return count < max_per_day
    
    def _check_lab_constraints(self, day, period, subject, current_class):
        """Check if a lab can be scheduled at this position"""
        if not self.subjects[subject]['is_lab']:
            return True
            
        # Check if we have enough continuous periods
        if period >= self.periods_per_day or period == self.lunch_after_period - 1:
            return False
            
        # Check if next period is free and not lunch
        next_period = period + 1
        if next_period == self.lunch_after_period or self.timetable[current_class][day, next_period]:
            return False
            
        # Get assigned teacher for this lab
        teacher_id = self.class_requirements[current_class][subject]['teacher_code']
        
        # Check if teacher is available in both periods
        for class_name in self.classes:
            for p in [period, next_period]:
                current_slot = self.timetable[class_name][day, p]
                if current_slot and current_slot != 'LUNCH':
                    slot_teacher = self.class_requirements[class_name][current_slot]['teacher_code']
                    if slot_teacher == teacher_id:
                        return False
                
        return True
    
    def _get_remaining_classes(self):
        remaining = {}
        for class_name in self.classes:
            scheduled_classes = defaultdict(int)
            for day in range(self.days):
                for period in range(self.periods_per_day + 1):  # +1 for lunch
                    if self.timetable[class_name][day, period] and self.timetable[class_name][day, period] != 'LUNCH':
                        subject = self.timetable[class_name][day, period]
                        if self.subjects[subject]['is_lab']:
                            if period == 0 or self.timetable[class_name][day, period-1] != subject:
                                scheduled_classes[subject] += 2
                        else:
                            scheduled_classes[subject] += 1
            
            remaining[class_name] = {}
            for subject in self.class_requirements[class_name]:
                if subject != 'teacher_code':
                    required = self.class_requirements[class_name][subject]['lectures']
                    remaining[class_name][subject] = required - scheduled_classes[subject]
        return remaining
    
    def calculate_timetable_score(self):
        """Calculate a score for the current timetable based on various criteria"""
        score = 0
        
        # Check even distribution of subjects across the week
        for class_name in self.classes:
            subject_distribution = defaultdict(list)
            for day in range(self.days):
                for period in range(self.periods_per_day + 1):
                    subject = self.timetable[class_name][day, period]
                    if subject and subject != 'LUNCH':
                        subject_distribution[subject].append(day)
            
            # Score based on distribution
            for subject, days in subject_distribution.items():
                unique_days = len(set(days))
                score += unique_days * 2  # Reward for spreading across different days
        
        # Reward for lab sessions in preferred periods
        for class_name in self.classes:
            for day in range(self.days):
                for period in self.preferred_lab_periods:
                    subject = self.timetable[class_name][day, period]
                    if subject and subject != 'LUNCH' and self.subjects[subject]['is_lab']:
                        score += 5  # Bonus for lab in preferred slot
        
        return score
    
    def reset(self):
        """Reset the environment to initial state"""
        self.timetable = self._create_empty_timetable()
        self.current_position = (0, 0, self.classes[0])
        return self._get_state()
    
    def _get_state(self):
        """Get current state of the environment"""
        return {
            'position': self.current_position,
            'timetable': {k: v.copy() for k, v in self.timetable.items()},
            'remaining_classes': self._get_remaining_classes()
        }
    
    def step(self, action):
        day, period, current_class = self.current_position
        reward = 0
        done = False
        
        # Skip lunch period
        if period == self.lunch_after_period:
            period += 1
            
        if self.subjects[action]['is_lab']:
            if self._check_lab_constraints(day, period, action, current_class):
                self.timetable[current_class][day, period] = action
                self.timetable[current_class][day, period + 1] = action
                reward = 1
                period += 1
            else:
                reward = -1
        else:
            if self._check_constraints(day, period, action, current_class):
                self.timetable[current_class][day, period] = action
                reward = 1
            else:
                reward = -1
        
        # Move to next position
        class_idx = self.classes.index(current_class)
        class_idx += 1
        if class_idx >= len(self.classes):
            class_idx = 0
            period += 1
            if period >= self.periods_per_day + 1:  # +1 for lunch
                period = 0
                day += 1
        
        if day >= self.days:
            done = True
            remaining = self._get_remaining_classes()
            if all(all(v == 0 for v in class_reqs.values()) 
                   for class_reqs in remaining.values()):
                reward += 100
                
        self.current_position = (day, period, self.classes[class_idx])
        return self._get_state(), reward, done
      
    def display_detailed_timetable(self):
        """Display a detailed version of the timetable with teacher information"""
        days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']
        periods = [f'Period {i+1}' for i in range(self.periods_per_day + 1)]
        
        for class_name in self.classes:
            print(f"\n{'='*80}")
            print(f"Timetable for Class {class_name}")
            print(f"{'='*80}")
            
            table_data = []
            for day_idx, day in enumerate(days):
                row = [day]
                for period in range(self.periods_per_day + 1):
                    subject = self.timetable[class_name][day_idx, period]
                    if subject == 'LUNCH':
                        cell = 'LUNCH BREAK'
                    elif subject:
                        teacher_code = self.class_requirements[class_name][subject]['teacher_code']
                        cell = f"{subject}\n({teacher_code})"
                    else:
                        cell = '-'
                    row.append(cell)
                table_data.append(row)
            
            print(tabulate(table_data, headers=['Day'] + periods, tablefmt='grid'))
            print(f"\nTeacher Allocation for Class {class_name}:")
            for subject in self.class_requirements[class_name]:
                if subject != 'teacher_code':
                    teacher_code = self.class_requirements[class_name][subject]['teacher_code']
                    print(f"- {subject}: {teacher_code}")

def train_timetable_generator(env, agent, episodes=1000):
    """Enhanced training function with better exploration and exploitation"""
    best_reward = float('-inf')
    best_timetable = None
    best_score = float('-inf')
    
    # Dynamic epsilon decay
    initial_epsilon = 0.3
    final_epsilon = 0.01
    epsilon_decay = (initial_epsilon - final_epsilon) / episodes
    
    for episode in range(episodes):
        state = env.reset()
        total_reward = 0
        done = False
        
        # Adjust exploration rate
        agent.epsilon = max(initial_epsilon - episode * epsilon_decay, final_epsilon)
        
        while not done:
            current_class = state['position'][2]
            valid_actions = [subject for subject, remaining 
                           in state['remaining_classes'][current_class].items() 
                           if remaining > 0]
            
            if not valid_actions:
                break
                
            action = agent.get_action(state, valid_actions)
            next_state, reward, done = env.step(action)
            
            # Update Q-values with enhanced reward shaping
            if done:
                timetable_score = env.calculate_timetable_score()
                reward += timetable_score
                
                if timetable_score > best_score:
                    best_score = timetable_score
                    best_timetable = copy.deepcopy(env.timetable)
            
            agent.update(state, action, reward, next_state)
            state = next_state
            total_reward += reward
        
        if total_reward > best_reward:
            best_reward = total_reward
            
        if episode % 100 == 0:
            print(f"Episode {episode}, Best Reward: {best_reward:.2f}, Best Score: {best_score:.2f}")
    
    return best_timetable

'''class QLearningAgent:
    def __init__(self, action_space, learning_rate=0.1, discount_factor=0.95, epsilon=0.3):
        self.q_table = defaultdict(lambda: defaultdict(float))
        self.lr = learning_rate
        self.gamma = discount_factor
        self.epsilon = epsilon
        self.action_space = action_space
        
    def get_action(self, state, valid_actions):
        if random.random() < self.epsilon:
            return random.choice(valid_actions)
        
        state_key = str(state['position'])
        q_values = {action: self.q_table[state_key][action] 
                   for action in valid_actions}
        
        if all(value == 0 for value in q_values.values()):
            return random.choice(valid_actions)
        
        return max(q_values.items(), key=lambda x: x[1])[0]
    
    def update(self, state, action, reward, next_state):
        state_key = str(state['position'])
        next_state_key = str(next_state['position'])
        
        next_max_q = max([self.q_table[next_state_key][a] 
                         for a in self.action_space]) if self.q_table[next_state_key] else 0
        
        current_q = self.q_table[state_key][action]
        new_q = current_q + self.lr * (reward + self.gamma * next_max_q - current_q)
        self.q_table[state_key][action] = new_q'''

# [Previous imports and MultiClassTimetableEnvironment class remain the same until QLearningAgent]

class QLearningAgent:
    def __init__(self, action_space, learning_rate=0.1, discount_factor=0.95, epsilon=0.3):
        self.q_table = defaultdict(lambda: defaultdict(float))
        self.lr = learning_rate
        self.gamma = discount_factor
        self.epsilon = epsilon
        self.action_space = action_space
        
    def get_state_key(self, state):
        """Convert state to a string key for Q-table"""
        if state and 'position' in state:
            return str(state['position'])
        return "default"
        
    def get_action(self, state, valid_actions):
        if not valid_actions:
            return None
            
        if random.random() < self.epsilon:
            return random.choice(valid_actions)
        
        state_key = self.get_state_key(state)
        q_values = {action: self.q_table[state_key][action] 
                   for action in valid_actions}
        
        if all(value == 0 for value in q_values.values()):
            return random.choice(valid_actions)
        
        return max(q_values.items(), key=lambda x: x[1])[0]
    
    def update(self, state, action, reward, next_state):
        if not state or not next_state or not action:
            return
            
        state_key = self.get_state_key(state)
        next_state_key = self.get_state_key(next_state)
        
        # Get next maximum Q-value safely
        next_q_values = [self.q_table[next_state_key][a] for a in self.action_space]
        next_max_q = max(next_q_values) if next_q_values else 0
        
        current_q = self.q_table[state_key][action]
        new_q = current_q + self.lr * (reward + self.gamma * next_max_q - current_q)
        self.q_table[state_key][action] = new_q

def train_timetable_generator(env, agent, episodes=1000):
    """Enhanced training function with better error handling"""
    best_reward = float('-inf')
    best_timetable = None
    best_score = float('-inf')
    
    initial_epsilon = 0.3
    final_epsilon = 0.01
    epsilon_decay = (initial_epsilon - final_epsilon) / episodes
    
    for episode in range(episodes):
        try:
            state = env.reset()
            total_reward = 0
            done = False
            
            agent.epsilon = max(initial_epsilon - episode * epsilon_decay, final_epsilon)
            
            while not done:
                if not state:
                    break
                    
                current_class = state['position'][2]
                valid_actions = [subject for subject, remaining 
                               in state['remaining_classes'][current_class].items() 
                               if remaining > 0]
                
                if not valid_actions:
                    break
                    
                action = agent.get_action(state, valid_actions)
                if not action:
                    break
                    
                next_state, reward, done = env.step(action)
                
                if done:
                    timetable_score = env.calculate_timetable_score()
                    reward += timetable_score
                    
                    if timetable_score > best_score:
                        best_score = timetable_score
                        best_timetable = copy.deepcopy(env.timetable)
                
                agent.update(state, action, reward, next_state)
                state = next_state
                total_reward += reward
            
            if total_reward > best_reward:
                best_reward = total_reward
                
            if episode % 100 == 0:
                print(f"Episode {episode}, Best Reward: {best_reward:.2f}, Best Score: {best_score:.2f}")
                
        except Exception as e:
            print(f"Error in episode {episode}: {str(e)}")
            continue
    
    return best_timetable if best_timetable is not None else env.timetable

# [Rest of the code remains the same]

if __name__ == "__main__":
    try:
   
        days = 5
        periods_per_day = 6
        lunch_after_period = 2
        classes = ['1A', '1B']
        
        subjects = {
            'MATH': {'is_lab': False},
            'PHYSICS': {'is_lab': False},
            'CHEMISTRY': {'is_lab': False},
            'BIOLOGY': {'is_lab': False},
            'PHYSICS_LAB': {'is_lab': True},
            'CHEMISTRY_LAB': {'is_lab': True},
            'BIOLOGY_LAB': {'is_lab': True}
        }
        
        teachers = {
            'AP001': ['MATH'],
            'AP002': ['PHYSICS', 'PHYSICS_LAB'],
            'AP003': ['CHEMISTRY', 'CHEMISTRY_LAB'],
            'AP004': ['BIOLOGY', 'BIOLOGY_LAB']
        }
        
        class_requirements = {
            '1A': {
                'MATH': {'teacher_code': 'AP001', 'lectures': 4},
                'PHYSICS': {'teacher_code': 'AP002', 'lectures': 3},
                'CHEMISTRY': {'teacher_code': 'AP003', 'lectures': 3},
                'BIOLOGY': {'teacher_code': 'AP004', 'lectures': 3},
                'PHYSICS_LAB': {'teacher_code': 'AP002', 'lectures': 2},
                'CHEMISTRY_LAB': {'teacher_code': 'AP003', 'lectures': 2},
                'BIOLOGY_LAB': {'teacher_code': 'AP004', 'lectures': 2}
            },
            '1B': {
                'MATH': {'teacher_code': 'AP001', 'lectures': 4},
                'PHYSICS': {'teacher_code': 'AP002', 'lectures': 3},
                'CHEMISTRY': {'teacher_code': 'AP003', 'lectures': 3},
                'BIOLOGY': {'teacher_code': 'AP004', 'lectures': 3},
                'PHYSICS_LAB': {'teacher_code': 'AP002', 'lectures': 2},
                'CHEMISTRY_LAB': {'teacher_code': 'AP003', 'lectures': 2},
                'BIOLOGY_LAB': {'teacher_code': 'AP004', 'lectures': 2}
            }
        }

        # Create and train the model with error handling
        env = MultiClassTimetableEnvironment(
                days, periods_per_day, lunch_after_period, classes, teachers, subjects, class_requirements
            )
        agent = QLearningAgent(list(subjects.keys()))
            
        print("Training the model...")
        best_timetable = train_timetable_generator(env, agent, episodes=1000)
            
        if best_timetable is not None:
                env.timetable = best_timetable
                env.display_detailed_timetable()
        else:
                print("Failed to generate a valid timetable. Please try again.")
                
    except Exception as e:
        print(f"An error occurred: {str(e)}")

Training the model...
Episode 0, Best Reward: 18.00, Best Score: -inf
Episode 100, Best Reward: 32.00, Best Score: -inf
Episode 200, Best Reward: 32.00, Best Score: -inf
Episode 300, Best Reward: 32.00, Best Score: -inf
Episode 400, Best Reward: 32.00, Best Score: -inf
Episode 500, Best Reward: 32.00, Best Score: -inf
Episode 600, Best Reward: 32.00, Best Score: -inf
Episode 700, Best Reward: 32.00, Best Score: -inf
Episode 800, Best Reward: 32.00, Best Score: -inf
Episode 900, Best Reward: 32.00, Best Score: -inf

Timetable for Class 1A
+-----------+------------+------------+-------------+---------------+---------------+-------------+------------+
| Day       | Period 1   | Period 2   | Period 3    | Period 4      | Period 5      | Period 6    | Period 7   |
| Monday    | PHYSICS    | MATH       | LUNCH BREAK | MATH          | BIOLOGY_LAB   | BIOLOGY_LAB | CHEMISTRY  |
|           | (AP002)    | (AP001)    |             | (AP001)       | (AP004)       | (AP004)     | (AP003)    |
+---

# Modified for teacher's name

In [9]:
# [Previous imports remain the same]

def get_teacher_details():
    """Get teacher details from user input"""
    teachers = {}
    teacher_names = {}
    
    num_teachers = int(input("Enter the number of teachers: "))
    
    for i in range(num_teachers):
        print(f"\nEnter details for teacher {i+1}:")
        code = input("Enter teacher code (e.g., AP001): ")
        name = input("Enter teacher name: ")
        print("Enter subjects taught (comma-separated, e.g., MATH,PHYSICS): ")
        subjects = [s.strip() for s in input().split(',')]
        
        teachers[code] = subjects
        teacher_names[code] = name
    
    return teachers, teacher_names

def train_timetable_generator(env, agent, episodes=1000):
    """
    Train the timetable generator using Q-learning
    
    Parameters:
    env: MultiClassTimetableEnvironment instance
    agent: QLearningAgent instance
    episodes: Number of training episodes
    
    Returns:
    best_timetable: The best timetable found during training
    """
    best_reward = float('-inf')
    best_timetable = None
    
    for episode in range(episodes):
        state = env.reset()
        total_reward = 0
        done = False
        
        while not done:
            current_class = state['position'][2]
            valid_actions = [subject for subject, remaining 
                           in state['remaining_classes'][current_class].items() 
                           if remaining > 0]
            
            if not valid_actions:
                break
                
            action = agent.get_action(state, valid_actions)
            next_state, reward, done = env.step(action)
            
            agent.update(state, action, reward, next_state)
            state = next_state
            total_reward += reward
        
        if total_reward > best_reward:
            best_reward = total_reward
            best_timetable = {k: v.copy() for k, v in env.timetable.items()}
            
        if episode % 100 == 0:
            print(f"Episode {episode}, Best Reward: {best_reward}")
    
    return best_timetable

def display_timetable(timetable_dict, periods_per_day):
    """
    Display the generated timetable in a readable format
    """
    days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']
    headers = ['Day'] + [f'Period {i+1}' for i in range(periods_per_day)]
    
    for class_name, timetable in timetable_dict.items():
        print(f"\nTimetable for Class {class_name}:")
        table_data = []
        for day_idx, day in enumerate(days):
            row = [day]
            for period in range(periods_per_day):
                subject = timetable[day_idx, period]
                row.append(subject if subject else '-')
            table_data.append(row)
        print(tabulate(table_data, headers=headers, tablefmt='grid'))

class MultiClassTimetableEnvironment:
    def __init__(self, days, periods_per_day, lunch_after_period, classes, teachers, teacher_names, subjects, class_requirements):
        self.days = days
        self.periods_per_day = periods_per_day
        self.lunch_after_period = lunch_after_period
        self.classes = classes
        self.teachers = teachers
        self.teacher_names = teacher_names  # Added teacher names dictionary
        self.subjects = subjects
        self.class_requirements = class_requirements
        
        # [Rest of the __init__ remains the same]
        self.max_consecutive_same_subject = 2  # Maximum consecutive periods for same subject
        self.preferred_lab_periods = [0, 2]  # Preferred periods for lab sessions (before and after lunch)
        
        self._validate_lab_requirements()
        self._validate_teacher_assignments()
        
        self.timetable = self._create_empty_timetable()
        self.current_position = (0, 0, classes[0])
        self.best_timetable = None
        self.best_score = float('-inf')

    def _validate_lab_requirements(self):
        """Ensure all lab subjects have even number of hours"""
        for class_name, requirements in self.class_requirements.items():
            for subject, req_dict in requirements.items():
                if subject in self.subjects and self.subjects[subject]['is_lab']:
                    lectures = req_dict['lectures']
                    if lectures % 2 != 0:
                        raise ValueError(f"Lab subject {subject} for class {class_name} must have even number of lectures. Current: {lectures}")

    def _create_empty_timetable(self):
        """Create an empty timetable for all classes with lunch break"""
        timetables = {}
        for class_name in self.classes:
            timetable = np.zeros((self.days, self.periods_per_day + 1), dtype=object)  # +1 for lunch break
            # Mark lunch break slots
            for day in range(self.days):
                timetable[day, self.lunch_after_period] = 'LUNCH'
            timetables[class_name] = timetable
        return timetables
    
    def _validate_teacher_assignments(self):
        """Validate that all subjects have valid teacher assignments"""
        for class_name, requirements in self.class_requirements.items():
            for subject, req_dict in requirements.items():
                if subject != 'teacher_code':
                    teacher_code = req_dict['teacher_code']
                    if teacher_code not in self.teachers:
                        raise ValueError(f"Invalid teacher code {teacher_code} for {subject} in class {class_name}")
                    if subject not in self.teachers[teacher_code]:
                        raise ValueError(f"Teacher {teacher_code} is not qualified to teach {subject}")

    def _check_daily_load(self, day, teacher_id):
        """Check if teacher has exceeded maximum daily load"""
        max_daily_load = 6  # Maximum periods per day for a teacher
        count = 0
        for class_name in self.classes:
            for period in range(self.periods_per_day + 1):
                current_slot = self.timetable[class_name][day, period]
                if current_slot and current_slot != 'LUNCH':
                    slot_teacher = self.class_requirements[class_name][current_slot]['teacher_code']
                    if slot_teacher == teacher_id:
                        count += 1
        return count < max_daily_load

    def _check_consecutive_subjects(self, day, period, subject, current_class):
        """Check if subject would create too many consecutive periods"""
        if period > 0 and period < self.periods_per_day:
            consecutive_count = 1
            # Check backwards
            p = period - 1
            while p >= 0 and self.timetable[current_class][day, p] == subject:
                consecutive_count += 1
                p -= 1
            # Check forwards
            p = period + 1
            while p < self.periods_per_day and self.timetable[current_class][day, p] == subject:
                consecutive_count += 1
                p += 1
            return consecutive_count <= self.max_consecutive_same_subject
        return True

    def _check_constraints(self, day, period, subject, current_class):
        """Enhanced constraint checking"""
        if period == self.lunch_after_period:
            return False
            
        if self.subjects[subject]['is_lab']:
            return self._check_lab_constraints(day, period, subject, current_class)
            
        teacher_id = self.class_requirements[current_class][subject]['teacher_code']
        
        # Basic teacher availability check
        if not self._check_teacher_availability(day, period, teacher_id):
            return False
            
        # Check daily teacher load
        if not self._check_daily_load(day, teacher_id):
            return False
            
        # Check consecutive subjects
        if not self._check_consecutive_subjects(day, period, subject, current_class):
            return False
            
        # Check subject distribution
        if not self._check_subject_distribution(day, subject, current_class):
            return False
        
        return True

    def _check_teacher_availability(self, day, period, teacher_id):
        """Check if teacher is available"""
        for class_name in self.classes:
            current_slot = self.timetable[class_name][day, period]
            if current_slot and current_slot != 'LUNCH':
                slot_teacher = self.class_requirements[class_name][current_slot]['teacher_code']
                if slot_teacher == teacher_id:
                    return False
        return True

    def _check_subject_distribution(self, day, subject, current_class):
        """Check if subject is well-distributed across the week"""
        max_per_day = 2  # Maximum occurrences of a subject per day
        count = 0
        for period in range(self.periods_per_day + 1):
            if self.timetable[current_class][day, period] == subject:
                count += 1
        return count < max_per_day
    
    def _check_lab_constraints(self, day, period, subject, current_class):
        """Check if a lab can be scheduled at this position"""
        if not self.subjects[subject]['is_lab']:
            return True
            
        # Check if we have enough continuous periods
        if period >= self.periods_per_day or period == self.lunch_after_period - 1:
            return False
            
        # Check if next period is free and not lunch
        next_period = period + 1
        if next_period == self.lunch_after_period or self.timetable[current_class][day, next_period]:
            return False
            
        # Get assigned teacher for this lab
        teacher_id = self.class_requirements[current_class][subject]['teacher_code']
        
        # Check if teacher is available in both periods
        for class_name in self.classes:
            for p in [period, next_period]:
                current_slot = self.timetable[class_name][day, p]
                if current_slot and current_slot != 'LUNCH':
                    slot_teacher = self.class_requirements[class_name][current_slot]['teacher_code']
                    if slot_teacher == teacher_id:
                        return False
                
        return True
    
    def _get_remaining_classes(self):
        remaining = {}
        for class_name in self.classes:
            scheduled_classes = defaultdict(int)
            for day in range(self.days):
                for period in range(self.periods_per_day + 1):  # +1 for lunch
                    if self.timetable[class_name][day, period] and self.timetable[class_name][day, period] != 'LUNCH':
                        subject = self.timetable[class_name][day, period]
                        if self.subjects[subject]['is_lab']:
                            if period == 0 or self.timetable[class_name][day, period-1] != subject:
                                scheduled_classes[subject] += 2
                        else:
                            scheduled_classes[subject] += 1
            
            remaining[class_name] = {}
            for subject in self.class_requirements[class_name]:
                if subject != 'teacher_code':
                    required = self.class_requirements[class_name][subject]['lectures']
                    remaining[class_name][subject] = required - scheduled_classes[subject]
        return remaining
    
    def calculate_timetable_score(self):
        """Calculate a score for the current timetable based on various criteria"""
        score = 0
        
        # Check even distribution of subjects across the week
        for class_name in self.classes:
            subject_distribution = defaultdict(list)
            for day in range(self.days):
                for period in range(self.periods_per_day + 1):
                    subject = self.timetable[class_name][day, period]
                    if subject and subject != 'LUNCH':
                        subject_distribution[subject].append(day)
            
            # Score based on distribution
            for subject, days in subject_distribution.items():
                unique_days = len(set(days))
                score += unique_days * 2  # Reward for spreading across different days
        
        # Reward for lab sessions in preferred periods
        for class_name in self.classes:
            for day in range(self.days):
                for period in self.preferred_lab_periods:
                    subject = self.timetable[class_name][day, period]
                    if subject and subject != 'LUNCH' and self.subjects[subject]['is_lab']:
                        score += 5  # Bonus for lab in preferred slot
        
        return score
    
    def reset(self):
        """Reset the environment to initial state"""
        self.timetable = self._create_empty_timetable()
        self.current_position = (0, 0, self.classes[0])
        return self._get_state()
    
    def _get_state(self):
        """Get current state of the environment"""
        return {
            'position': self.current_position,
            'timetable': {k: v.copy() for k, v in self.timetable.items()},
            'remaining_classes': self._get_remaining_classes()
        }
    
    def step(self, action):
        day, period, current_class = self.current_position
        reward = 0
        done = False
        
        # Skip lunch period
        if period == self.lunch_after_period:
            period += 1
            
        if self.subjects[action]['is_lab']:
            if self._check_lab_constraints(day, period, action, current_class):
                self.timetable[current_class][day, period] = action
                self.timetable[current_class][day, period + 1] = action
                reward = 1
                period += 1
            else:
                reward = -1
        else:
            if self._check_constraints(day, period, action, current_class):
                self.timetable[current_class][day, period] = action
                reward = 1
            else:
                reward = -1
        
        # Move to next position
        class_idx = self.classes.index(current_class)
        class_idx += 1
        if class_idx >= len(self.classes):
            class_idx = 0
            period += 1
            if period >= self.periods_per_day + 1:  # +1 for lunch
                period = 0
                day += 1
        
        if day >= self.days:
            done = True
            remaining = self._get_remaining_classes()
            if all(all(v == 0 for v in class_reqs.values()) 
                   for class_reqs in remaining.values()):
                reward += 100
                
        self.current_position = (day, period, self.classes[class_idx])
        return self._get_state(), reward, done
    
    
    def display_detailed_timetable(self):
        """Display a detailed version of the timetable with teacher information"""
        days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']
        periods = [f'Period {i+1}' for i in range(self.periods_per_day + 1)]
        
        for class_name in self.classes:
            print(f"\n{'='*80}")
            print(f"Timetable for Class {class_name}")
            print(f"{'='*80}")
            
            table_data = []
            for day_idx, day in enumerate(days):
                row = [day]
                for period in range(self.periods_per_day + 1):
                    subject = self.timetable[class_name][day_idx, period]
                    if subject == 'LUNCH':
                        cell = 'LUNCH BREAK'
                    elif subject:
                        teacher_code = self.class_requirements[class_name][subject]['teacher_code']
                        teacher_name = self.teacher_names[teacher_code]
                        cell = f"{subject}\n({teacher_code})"
                    else:
                        cell = '-'
                    row.append(cell)
                table_data.append(row)
            
            print(tabulate(table_data, headers=['Day'] + periods, tablefmt='grid'))
            print(f"\nTeacher Allocation for Class {class_name}:")
            for subject in self.class_requirements[class_name]:
                if subject != 'teacher_code':
                    teacher_code = self.class_requirements[class_name][subject]['teacher_code']
                    teacher_name = self.teacher_names[teacher_code]
                    print(f"- {subject}: {teacher_code} - {teacher_name}")
                    
class QLearningAgent:
    def __init__(self, action_space, learning_rate=0.1, discount_factor=0.95, epsilon=0.3):
        self.q_table = defaultdict(lambda: defaultdict(float))
        self.lr = learning_rate
        self.gamma = discount_factor
        self.epsilon = epsilon
        self.action_space = action_space
        
    def get_state_key(self, state):
        """Convert state to a string key for Q-table"""
        if state and 'position' in state:
            return str(state['position'])
        return "default"
        
    def get_action(self, state, valid_actions):
        if not valid_actions:
            return None
            
        if random.random() < self.epsilon:
            return random.choice(valid_actions)
        
        state_key = self.get_state_key(state)
        q_values = {action: self.q_table[state_key][action] 
                   for action in valid_actions}
        
        if all(value == 0 for value in q_values.values()):
            return random.choice(valid_actions)
        
        return max(q_values.items(), key=lambda x: x[1])[0]
    
    def update(self, state, action, reward, next_state):
        if not state or not next_state or not action:
            return
            
        state_key = self.get_state_key(state)
        next_state_key = self.get_state_key(next_state)
        
        # Get next maximum Q-value safely
        next_q_values = [self.q_table[next_state_key][a] for a in self.action_space]
        next_max_q = max(next_q_values) if next_q_values else 0
        
        current_q = self.q_table[state_key][action]
        new_q = current_q + self.lr * (reward + self.gamma * next_max_q - current_q)
        self.q_table[state_key][action] = new_q
        
def train_timetable_generator(env, agent, episodes=1000):
    """Enhanced training function with better error handling"""
    best_reward = float('-inf')
    best_timetable = None
    best_score = float('-inf')
    
    initial_epsilon = 0.3
    final_epsilon = 0.01
    epsilon_decay = (initial_epsilon - final_epsilon) / episodes
    
    for episode in range(episodes):
        try:
            state = env.reset()
            total_reward = 0
            done = False
            
            agent.epsilon = max(initial_epsilon - episode * epsilon_decay, final_epsilon)
            
            while not done:
                if not state:
                    break
                    
                current_class = state['position'][2]
                valid_actions = [subject for subject, remaining 
                               in state['remaining_classes'][current_class].items() 
                               if remaining > 0]
                
                if not valid_actions:
                    break
                    
                action = agent.get_action(state, valid_actions)
                if not action:
                    break
                    
                next_state, reward, done = env.step(action)
                
                if done:
                    timetable_score = env.calculate_timetable_score()
                    reward += timetable_score
                    
                    if timetable_score > best_score:
                        best_score = timetable_score
                        best_timetable = copy.deepcopy(env.timetable)
                
                agent.update(state, action, reward, next_state)
                state = next_state
                total_reward += reward
            
            if total_reward > best_reward:
                best_reward = total_reward
                
            if episode % 100 == 0:
                print(f"Episode {episode}, Best Reward: {best_reward:.2f}, Best Score: {best_score:.2f}")
                
        except Exception as e:
            print(f"Error in episode {episode}: {str(e)}")
            continue
    
    return best_timetable if best_timetable is not None else env.timetable

if __name__ == "__main__":
    try:
        days = 5
        periods_per_day = int(input("Enter number of periods per day: "))
        lunch_after_period = int(input("Enter after which period lunch break should be scheduled (e.g., 2 for after 2nd period): "))
        
        # Get class information
        num_classes = int(input("Enter number of classes: "))
        classes = []
        for i in range(num_classes):
            class_name = input(f"Enter name for class {i+1}: ")
            classes.append(class_name)
        
        # Define subjects
        print("\nEnter subject details:")
        subjects = {}
        num_subjects = int(input("Enter number of subjects: "))
        for i in range(num_subjects):
            subject_name = input(f"Enter name for subject {i+1}: ")
            is_lab = input(f"Is {subject_name} a lab subject? (yes/no): ").lower() == 'yes'
            subjects[subject_name] = {'is_lab': is_lab}
        
        # Get teacher details
        print("\nEnter teacher details:")
        teachers, teacher_names = get_teacher_details()
        
        # Get class requirements
        class_requirements = {}
        print("\nEnter class requirements:")
        for class_name in classes:
            print(f"\nFor class {class_name}:")
            class_requirements[class_name] = {}
            
            for subject in subjects:
                print(f"\nFor subject {subject}:")
                teacher_code = input(f"Enter teacher code who will teach {subject}: ")
                while teacher_code not in teachers or subject not in teachers[teacher_code]:
                    print(f"Error: Teacher {teacher_code} cannot teach {subject}. Available teachers:")
                    for t_code, t_subjects in teachers.items():
                        if subject in t_subjects:
                            print(f"- {t_code} ({teacher_names[t_code]})")
                    teacher_code = input(f"Enter valid teacher code for {subject}: ")
                
                lectures = int(input(f"Enter number of lectures for {subject}: "))
                if subjects[subject]['is_lab'] and lectures % 2 != 0:
                    print("Warning: Lab subjects must have even number of lectures.")
                    lectures = int(input(f"Enter even number of lectures for {subject}: "))
                
                class_requirements[class_name][subject] = {
                    'teacher_code': teacher_code,
                    'lectures': lectures
                }
        
        # Create and train the model
        env = MultiClassTimetableEnvironment(
            days, periods_per_day, lunch_after_period, classes, 
            teachers, teacher_names, subjects, class_requirements
        )
        agent = QLearningAgent(list(subjects.keys()))
        
        print("\nTraining the model...")
        best_timetable = train_timetable_generator(env, agent, episodes=1000)
        
        if best_timetable is not None:
            env.timetable = best_timetable
            env.display_detailed_timetable()
        else:
            print("Failed to generate a valid timetable. Please try again.")
            
    except Exception as e:
        print(f"An error occurred: {str(e)}")
        import traceback
        traceback.print_exc()


Enter subject details:

Enter teacher details:

Enter details for teacher 1:
Enter subjects taught (comma-separated, e.g., MATH,PHYSICS): 

Enter details for teacher 2:
Enter subjects taught (comma-separated, e.g., MATH,PHYSICS): 

Enter details for teacher 3:
Enter subjects taught (comma-separated, e.g., MATH,PHYSICS): 

Enter class requirements:

For class 1a:

For subject math:

For subject chem:

For subject phy:

For subject chem_lab:

For subject phy_lab:

For class 1b:

For subject math:

For subject chem:

For subject phy:

For subject chem_lab:

For subject phy_lab:

Training the model...
Episode 0, Best Reward: 170.00, Best Score: 56.00
Episode 100, Best Reward: 170.00, Best Score: 56.00
Episode 200, Best Reward: 170.00, Best Score: 56.00
Episode 300, Best Reward: 170.00, Best Score: 56.00
Episode 400, Best Reward: 170.00, Best Score: 56.00
Episode 500, Best Reward: 170.00, Best Score: 56.00
Episode 600, Best Reward: 170.00, Best Score: 56.00
Episode 700, Best Reward: 170.00