# Simple code with one class

In [10]:
import numpy as np
import random
from collections import defaultdict
from tabulate import tabulate  # For pretty printing the timetable

# [Previous TimetableEnvironment and QLearningAgent classes remain the same...]

def display_timetable(timetable, periods_per_day):
    """Format and display the timetable in a readable way"""
    days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']
    headers = ['Day'] + [f'Period {i+1}' for i in range(periods_per_day)]
    
    table_data = []
    for day_idx, day in enumerate(days):
        row = [day]
        for period in range(periods_per_day):
            subject = timetable[day_idx, period]
            row.append(subject if subject else '-')
        table_data.append(row)
    
    print("\nGenerated Timetable:")
    print(tabulate(table_data, headers=headers, tablefmt='grid'))

def verify_timetable(timetable, teachers, subjects, class_requirements):
    """Verify if the generated timetable meets all constraints"""
    days, periods = timetable.shape
    violations = []
    
    # Count subject hours
    subject_count = defaultdict(int)
    for day in range(days):
        for period in range(periods):
            if timetable[day, period]:
                subject_count[timetable[day, period]] += 1
    
    # Check requirements
    for subject, required in class_requirements.items():
        if subject_count[subject] != required:
            violations.append(f"Subject {subject} has {subject_count[subject]} hours instead of required {required}")
    
    # Check teacher conflicts
    for day in range(days):
        for period in range(periods):
            current_teachers = set()
            current_subject = timetable[day, period]
            if current_subject:
                for teacher, subjects_taught in teachers.items():
                    if current_subject in subjects_taught:
                        if teacher in current_teachers:
                            violations.append(f"Teacher conflict at Day {day+1}, Period {period+1}")
                        current_teachers.add(teacher)
    
    return violations

# Example usage with output
if __name__ == "__main__":
    # Define parameters
    days = 5
    periods_per_day = 4
    teachers = {
        'T1': ['MATH101', 'MATH102'],
        'T2': ['PHY101', 'PHY_LAB'],
        'T3': ['CHEM101', 'CHEM_LAB']
    }
    subjects = {
        'MATH101': 1,
        'MATH102': 1,
        'PHY101': 1,
        'PHY_LAB': 2,
        'CHEM101': 1,
        'CHEM_LAB': 2
    }
    class_requirements = {
        'MATH101': 3,
        'MATH102': 3,
        'PHY101': 3,
        'PHY_LAB': 4,
        'CHEM101': 2,
        'CHEM_LAB': 4
    }

    # Create and train the model
    env = TimetableEnvironment(days, periods_per_day, teachers, subjects, class_requirements)
    agent = QLearningAgent(list(subjects.keys()))
    print("Training the model...")
    best_timetable = train_timetable_generator(env, agent, episodes=1000)

    # Display the results
    display_timetable(best_timetable, periods_per_day)
    
    # Verify the timetable
    violations = verify_timetable(best_timetable, teachers, subjects, class_requirements)
    if violations:
        print("\nConstraint Violations Found:")
        for violation in violations:
            print(f"- {violation}")
    else:
        print("\nAll constraints satisfied!")

    # Display subject-wise statistics
    print("\nSubject-wise Distribution:")
    subject_count = defaultdict(int)
    for day in range(days):
        for period in range(periods_per_day):
            if best_timetable[day, period]:
                subject_count[best_timetable[day, period]] += 1
    
    for subject, count in subject_count.items():
        print(f"{subject}: {count} hours (Required: {class_requirements[subject]})")

Training the model...

Generated Timetable:
+-----------+------------+------------+------------+------------+
| Day       | Period 1   | Period 2   | Period 3   | Period 4   |
| Monday    | MATH101    | MATH101    | MATH101    | MATH102    |
+-----------+------------+------------+------------+------------+
| Tuesday   | CHEM_LAB   | PHY101     | PHY_LAB    | CHEM101    |
+-----------+------------+------------+------------+------------+
| Wednesday | PHY_LAB    | CHEM101    | CHEM_LAB   | PHY101     |
+-----------+------------+------------+------------+------------+
| Thursday  | -          | -          | -          | -          |
+-----------+------------+------------+------------+------------+
| Friday    | -          | -          | -          | -          |
+-----------+------------+------------+------------+------------+

Constraint Violations Found:
- Subject MATH102 has 1 hours instead of required 3
- Subject PHY101 has 2 hours instead of required 3
- Subject PHY_LAB has 2 hours i

# For more classes

In [12]:
import numpy as np
import random
from collections import defaultdict
from tabulate import tabulate

class MultiClassTimetableEnvironment:
    def __init__(self, days, periods_per_day, classes, teachers, subjects, class_requirements):
        self.days = days
        self.periods_per_day = periods_per_day
        self.classes = classes  # List of class names ['1A', '1B']
        self.teachers = teachers  # Dict of teacher_id: [subject_codes]
        self.subjects = subjects  # Dict of subject_code: duration (1 or 2 for labs)
        self.class_requirements = class_requirements  # Dict of class_name: {subject_code: weekly_hours}
        
        # Initialize timetable as 4D array: days x periods x classes
        self.timetable = {
            class_name: np.zeros((days, periods_per_day), dtype=object)
            for class_name in classes
        }
        self.current_position = (0, 0, classes[0])  # (day, period, class)
        
    def reset(self):
        self.timetable = {
            class_name: np.zeros((self.days, self.periods_per_day), dtype=object)
            for class_name in self.classes
        }
        self.current_position = (0, 0, self.classes[0])
        return self._get_state()
    
    def _get_state(self):
        return {
            'position': self.current_position,
            'timetable': {k: v.copy() for k, v in self.timetable.items()},
            'remaining_classes': self._get_remaining_classes()
        }
    
    def _get_remaining_classes(self):
        remaining = {}
        for class_name in self.classes:
            scheduled_classes = defaultdict(int)
            for day in range(self.days):
                for period in range(self.periods_per_day):
                    if self.timetable[class_name][day, period]:
                        subject = self.timetable[class_name][day, period]
                        scheduled_classes[subject] += 1
            
            remaining[class_name] = {}
            for subject, required in self.class_requirements[class_name].items():
                remaining[class_name][subject] = required - scheduled_classes[subject]
        return remaining
    
    def _check_constraints(self, day, period, subject, current_class):
        # Find teacher for this subject
        teacher_id = None
        for t_id, subjects in self.teachers.items():
            if subject in subjects:
                teacher_id = t_id
                break
                
        if teacher_id is None:
            return False
            
        # Check if teacher is already teaching any other class at this time
        for class_name in self.classes:
            if self.timetable[class_name][day, period]:
                current_subject = self.timetable[class_name][day, period]
                if current_subject in self.teachers[teacher_id]:
                    return False
        
        # Check if enough continuous periods for labs
        if self.subjects[subject] == 2:  # Lab class
            if period >= self.periods_per_day - 1:
                return False
            if self.timetable[current_class][day, period + 1]:
                return False
                
        return True
    
    def step(self, action):
        day, period, current_class = self.current_position
        reward = 0
        done = False
        
        # Try to place the subject
        if self._check_constraints(day, period, action, current_class):
            self.timetable[current_class][day, period] = action
            if self.subjects[action] == 2:  # Lab class
                self.timetable[current_class][day, period + 1] = action
            reward = 1
        else:
            reward = -1
            
        # Move to next position
        class_idx = self.classes.index(current_class)
        class_idx += 1
        if class_idx >= len(self.classes):
            class_idx = 0
            period += 1
            if period >= self.periods_per_day:
                period = 0
                day += 1
        
        if day >= self.days:
            done = True
            # Additional reward if all requirements met
            if all(all(v == 0 for v in class_reqs.values()) 
                   for class_reqs in self._get_remaining_classes().values()):
                reward += 100
                
        self.current_position = (day, period, self.classes[class_idx])
        return self._get_state(), reward, done

class QLearningAgent:
    def __init__(self, action_space, learning_rate=0.1, discount_factor=0.95, epsilon=0.1):
        self.q_table = defaultdict(lambda: defaultdict(float))
        self.lr = learning_rate
        self.gamma = discount_factor
        self.epsilon = epsilon
        self.action_space = action_space
    
    def get_action(self, state, valid_actions):
        if random.random() < self.epsilon:
            return random.choice(valid_actions)
        
        state_key = str(state)
        q_values = {action: self.q_table[state_key][action] 
                   for action in valid_actions}
        return max(q_values.items(), key=lambda x: x[1])[0]
    
    def update(self, state, action, reward, next_state):
        state_key = str(state)
        next_state_key = str(next_state)
        
        next_max_q = max(self.q_table[next_state_key].values()) if self.q_table[next_state_key] else 0
        current_q = self.q_table[state_key][action]
        new_q = current_q + self.lr * (reward + self.gamma * next_max_q - current_q)
        self.q_table[state_key][action] = new_q

def train_timetable_generator(env, agent, episodes=1000):
    best_reward = float('-inf')
    best_timetable = None
    
    for episode in range(episodes):
        state = env.reset()
        total_reward = 0
        done = False
        
        while not done:
            current_class = state['position'][2]
            valid_actions = [subject for subject, remaining 
                           in state['remaining_classes'][current_class].items() 
                           if remaining > 0]
            
            if not valid_actions:
                break
                
            action = agent.get_action(state, valid_actions)
            next_state, reward, done = env.step(action)
            
            agent.update(state, action, reward, next_state)
            state = next_state
            total_reward += reward
        
        if total_reward > best_reward:
            best_reward = total_reward
            best_timetable = {k: v.copy() for k, v in env.timetable.items()}
            
    return best_timetable

def display_timetable(timetable_dict, periods_per_day):
    days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']
    headers = ['Day'] + [f'Period {i+1}' for i in range(periods_per_day)]
    
    for class_name, timetable in timetable_dict.items():
        print(f"\nTimetable for Class {class_name}:")
        table_data = []
        for day_idx, day in enumerate(days):
            row = [day]
            for period in range(periods_per_day):
                subject = timetable[day_idx, period]
                row.append(subject if subject else '-')
            table_data.append(row)
        print(tabulate(table_data, headers=headers, tablefmt='grid'))

def verify_timetable(timetable_dict, teachers, subjects, class_requirements):
    violations = []
    days, periods = next(iter(timetable_dict.values())).shape
    
    # Check teacher conflicts across classes
    for day in range(days):
        for period in range(periods):
            teacher_subjects = defaultdict(list)
            
            # Collect all subjects being taught in this period
            for class_name, timetable in timetable_dict.items():
                subject = timetable[day, period]
                if subject:
                    # Find teacher for this subject
                    for teacher, subjects_taught in teachers.items():
                        if subject in subjects_taught:
                            teacher_subjects[teacher].append((class_name, subject))
            
            # Check for conflicts
            for teacher, assignments in teacher_subjects.items():
                if len(assignments) > 1:
                    classes = [f"{class_name}({subject})" for class_name, subject in assignments]
                    violations.append(
                        f"Teacher {teacher} has multiple classes at Day {day+1}, "
                        f"Period {period+1}: {', '.join(classes)}"
                    )
    
    # Check class requirements
    for class_name, timetable in timetable_dict.items():
        subject_count = defaultdict(int)
        for day in range(days):
            for period in range(periods):
                if timetable[day, period]:
                    subject_count[timetable[day, period]] += 1
        
        for subject, required in class_requirements[class_name].items():
            if subject_count[subject] != required:
                violations.append(
                    f"Class {class_name}: Subject {subject} has {subject_count[subject]} "
                    f"hours instead of required {required}"
                )
    
    return violations

# Example usage
if __name__ == "__main__":
    days = 5
    periods_per_day = 3
    classes = ['1A', '1B']
    
    teachers = {
        'T1': ['MATH101', 'MATH102'],
        'T2': ['PHY101', 'PHY_LAB'],
        'T3': ['CHEM101', 'CHEM_LAB']
    }
    
    subjects = {
        'MATH101': 1,
        'MATH102': 1,
        'PHY101': 1,
        'PHY_LAB': 2,
        'CHEM101': 1,
        'CHEM_LAB': 2
    }
    
    # Define requirements for each class
    class_requirements = {
        '1A': {
            'MATH101': 3,
            'MATH102': 3,
            'PHY101': 3,
            'PHY_LAB': 2,
            'CHEM101': 3,
            'CHEM_LAB': 2
        },
        '1B': {
            'MATH101': 3,
            'MATH102': 3,
            'PHY101': 3,
            'PHY_LAB': 2,
            'CHEM101': 3,
            'CHEM_LAB': 2
        }
    }

    # Create and train the model
    env = MultiClassTimetableEnvironment(
        days, periods_per_day, classes, teachers, subjects, class_requirements
    )
    agent = QLearningAgent(list(subjects.keys()))
    
    print("Training the model...")
    best_timetable = train_timetable_generator(env, agent, episodes=1000)

    # Display the results
    display_timetable(best_timetable, periods_per_day)
    
    # Verify the timetable
    violations = verify_timetable(best_timetable, teachers, subjects, class_requirements)
    if violations:
        print("\nConstraint Violations Found:")
        for violation in violations:
            print(f"- {violation}")
    else:
        print("\nAll constraints satisfied!")

    # Display subject-wise statistics for each class
    for class_name in classes:
        print(f"\nClass {class_name} Subject Distribution:")
        subject_count = defaultdict(int)
        for day in range(days):
            for period in range(periods_per_day):
                if best_timetable[class_name][day, period]:
                    subject_count[best_timetable[class_name][day, period]] += 1
        
        for subject, count in subject_count.items():
            print(f"{subject}: {count} hours (Required: {class_requirements[class_name][subject]})")

Training the model...

Timetable for Class 1A:
+-----------+------------+------------+------------+
| Day       | Period 1   | Period 2   | Period 3   |
| Monday    | MATH101    | MATH101    | MATH101    |
+-----------+------------+------------+------------+
| Tuesday   | CHEM101    | CHEM101    | MATH102    |
+-----------+------------+------------+------------+
| Wednesday | MATH102    | CHEM_LAB   | MATH102    |
+-----------+------------+------------+------------+
| Thursday  | PHY101     | PHY101     | PHY101     |
+-----------+------------+------------+------------+
| Friday    | CHEM_LAB   | PHY_LAB    | CHEM101    |
+-----------+------------+------------+------------+

Timetable for Class 1B:
+-----------+------------+------------+------------+
| Day       | Period 1   | Period 2   | Period 3   |
| Monday    | PHY101     | PHY101     | PHY101     |
+-----------+------------+------------+------------+
| Tuesday   | MATH101    | PHY_LAB    | CHEM101    |
+-----------+------------+-

# New code

In [5]:
import numpy as np
import random
from collections import defaultdict
from tabulate import tabulate

# [Previous imports and MultiClassTimetableEnvironment class remain the same]

class QLearningAgent:
    def __init__(self, action_space, learning_rate=0.1, discount_factor=0.95, epsilon=0.1):
        """
        Initialize Q-Learning Agent
        
        Parameters:
        action_space: List of all possible actions (subjects)
        learning_rate: Alpha value for learning rate (default 0.1)
        discount_factor: Gamma value for future reward discount (default 0.95)
        epsilon: For epsilon-greedy action selection (default 0.1)
        """
        self.q_table = defaultdict(lambda: defaultdict(float))
        self.lr = learning_rate
        self.gamma = discount_factor
        self.epsilon = epsilon
        self.action_space = action_space
    
    def get_action(self, state, valid_actions):
        """
        Select action using epsilon-greedy policy
        
        Parameters:
        state: Current state of the environment
        valid_actions: List of currently valid actions
        """
        # Exploration
        if random.random() < self.epsilon:
            return random.choice(valid_actions)
        
        # Exploitation
        state_key = str(state)
        q_values = {action: self.q_table[state_key][action] 
                   for action in valid_actions}
        
        # If all Q-values are 0, choose randomly
        if all(value == 0 for value in q_values.values()):
            return random.choice(valid_actions)
        
        # Choose action with highest Q-value
        return max(q_values.items(), key=lambda x: x[1])[0]
    
    def update(self, state, action, reward, next_state):
        """
        Update Q-value for state-action pair
        
        Parameters:
        state: Current state
        action: Taken action
        reward: Received reward
        next_state: Resulting state
        """
        state_key = str(state)
        next_state_key = str(next_state)
        
        # Get maximum Q-value for next state
        next_max_q = max(self.q_table[next_state_key].values()) if self.q_table[next_state_key] else 0
        
        # Update Q-value using Q-learning formula
        current_q = self.q_table[state_key][action]
        new_q = current_q + self.lr * (reward + self.gamma * next_max_q - current_q)
        self.q_table[state_key][action] = new_q

def train_timetable_generator(env, agent, episodes=1000):
    """
    Train the timetable generator using Q-learning
    
    Parameters:
    env: MultiClassTimetableEnvironment instance
    agent: QLearningAgent instance
    episodes: Number of training episodes
    
    Returns:
    best_timetable: The best timetable found during training
    """
    best_reward = float('-inf')
    best_timetable = None
    
    for episode in range(episodes):
        state = env.reset()
        total_reward = 0
        done = False
        
        while not done:
            current_class = state['position'][2]
            valid_actions = [subject for subject, remaining 
                           in state['remaining_classes'][current_class].items() 
                           if remaining > 0]
            
            if not valid_actions:
                break
                
            action = agent.get_action(state, valid_actions)
            next_state, reward, done = env.step(action)
            
            agent.update(state, action, reward, next_state)
            state = next_state
            total_reward += reward
        
        if total_reward > best_reward:
            best_reward = total_reward
            best_timetable = {k: v.copy() for k, v in env.timetable.items()}
            
        if episode % 100 == 0:
            print(f"Episode {episode}, Best Reward: {best_reward}")
    
    return best_timetable

def display_timetable(timetable_dict, periods_per_day):
    """
    Display the generated timetable in a readable format
    """
    days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']
    headers = ['Day'] + [f'Period {i+1}' for i in range(periods_per_day)]
    
    for class_name, timetable in timetable_dict.items():
        print(f"\nTimetable for Class {class_name}:")
        table_data = []
        for day_idx, day in enumerate(days):
            row = [day]
            for period in range(periods_per_day):
                subject = timetable[day_idx, period]
                row.append(subject if subject else '-')
            table_data.append(row)
        print(tabulate(table_data, headers=headers, tablefmt='grid'))

class MultiClassTimetableEnvironment:
    def __init__(self, days, periods_per_day, classes, teachers, subjects, class_requirements):
        self.days = days
        self.periods_per_day = periods_per_day
        self.classes = classes
        self.teachers = teachers
        self.subjects = subjects  # Dict of subject_code: is_lab (True/False)
        self.class_requirements = class_requirements
        
        # Validate lab hours are even numbers
        self._validate_lab_requirements()
        
        # Initialize timetable
        self.timetable = self._create_empty_timetable()
        self.current_position = (0, 0, classes[0])
        
    def _create_empty_timetable(self):
        """Create an empty timetable for all classes"""
        return {
            class_name: np.zeros((self.days, self.periods_per_day), dtype=object)
            for class_name in self.classes
        }
    
    def reset(self):
        """Reset the environment to initial state"""
        # Reset timetable to empty
        self.timetable = self._create_empty_timetable()
        # Reset position to start
        self.current_position = (0, 0, self.classes[0])
        # Return initial state
        return self._get_state()
    
    def _get_state(self):
        """Get current state of the environment"""
        return {
            'position': self.current_position,
            'timetable': {k: v.copy() for k, v in self.timetable.items()},
            'remaining_classes': self._get_remaining_classes()
        }
    
    def _check_constraints(self, day, period, subject, current_class):
        """Check if regular class can be scheduled"""
        if self.subjects[subject]['is_lab']:
            return self._check_lab_constraints(day, period, subject, current_class)
            
        # Find teacher for this subject
        teacher_id = None
        for t_id, subjects in self.teachers.items():
            if subject in subjects:
                teacher_id = t_id
                break
                
        if teacher_id is None:
            return False
            
        # Check if teacher is already teaching any other class at this time
        for class_name in self.classes:
            if self.timetable[class_name][day, period]:
                current_subject = self.timetable[class_name][day, period]
                if current_subject in self.teachers[teacher_id]:
                    return False
        
        return True

    def _validate_lab_requirements(self):
        """Ensure all lab subjects have even number of hours"""
        for class_name, requirements in self.class_requirements.items():
            for subject, hours in requirements.items():
                if self.subjects[subject]['is_lab'] and hours % 2 != 0:
                    raise ValueError(f"Lab subject {subject} for class {class_name} must have even hours. Current: {hours}")
    
    def _check_lab_constraints(self, day, period, subject, current_class):
        """Check if a lab can be scheduled at this position"""
        if not self.subjects[subject]['is_lab']:
            return True
            
        # Check if we have enough continuous periods
        if period >= self.periods_per_day - 1:
            return False
            
        # Check if next period is free
        if self.timetable[current_class][day, period + 1]:
            return False
            
        # Check teacher availability for both periods
        teacher_id = None
        for t_id, subjects in self.teachers.items():
            if subject in subjects:
                teacher_id = t_id
                break
                
        if teacher_id is None:
            return False
            
        # Check if teacher is available in both periods
        for class_name in self.classes:
            if (self.timetable[class_name][day, period] and 
                self.timetable[class_name][day, period] in self.teachers[teacher_id]):
                return False
            if (self.timetable[class_name][day, period + 1] and 
                self.timetable[class_name][day, period + 1] in self.teachers[teacher_id]):
                return False
                
        return True
    
    def _get_remaining_classes(self):
        remaining = {}
        for class_name in self.classes:
            scheduled_classes = defaultdict(int)
            for day in range(self.days):
                for period in range(self.periods_per_day):
                    if self.timetable[class_name][day, period]:
                        subject = self.timetable[class_name][day, period]
                        if self.subjects[subject]['is_lab']:
                            # Count lab hours as 2 for each slot
                            if period == 0 or self.timetable[class_name][day, period-1] != subject:
                                scheduled_classes[subject] += 2
                        else:
                            scheduled_classes[subject] += 1
            
            remaining[class_name] = {}
            for subject, required in self.class_requirements[class_name].items():
                remaining[class_name][subject] = required - scheduled_classes[subject]
        return remaining
    
    def step(self, action):
        day, period, current_class = self.current_position
        reward = 0
        done = False
        
        if self.subjects[action]['is_lab']:
            # Handle lab scheduling
            if self._check_lab_constraints(day, period, action, current_class):
                self.timetable[current_class][day, period] = action
                self.timetable[current_class][day, period + 1] = action
                reward = 1
                period += 1  # Skip next period as it's used by the lab
            else:
                reward = -1
        else:
            # Handle regular class scheduling
            if self._check_constraints(day, period, action, current_class):
                self.timetable[current_class][day, period] = action
                reward = 1
            else:
                reward = -1
        
        # Move to next position
        class_idx = self.classes.index(current_class)
        class_idx += 1
        if class_idx >= len(self.classes):
            class_idx = 0
            period += 1
            if period >= self.periods_per_day:
                period = 0
                day += 1
        
        if day >= self.days:
            done = True
            if all(all(v == 0 for v in class_reqs.values()) 
                   for class_reqs in self._get_remaining_classes().values()):
                reward += 100
                
        self.current_position = (day, period, self.classes[class_idx])
        return self._get_state(), reward, done
    # [Rest of the methods remain the same as in previous code]

'''class MultiClassTimetableEnvironment:
    def __init__(self, days, periods_per_day, classes, teachers, subjects, class_requirements):
        self.days = days
        self.periods_per_day = periods_per_day
        self.classes = classes
        self.teachers = teachers
        self.subjects = subjects  # Dict of subject_code: is_lab (True/False)
        self.class_requirements = class_requirements
        
        # Validate lab hours are even numbers
        self._validate_lab_requirements()
        
        self.timetable = {
            class_name: np.zeros((days, periods_per_day), dtype=object)
            for class_name in classes
        }
        self.current_position = (0, 0, classes[0])
        
    def _validate_lab_requirements(self):
        """Ensure all lab subjects have even number of hours"""
        for class_name, requirements in self.class_requirements.items():
            for subject, hours in requirements.items():
                if self.subjects[subject]['is_lab'] and hours % 2 != 0:
                    raise ValueError(f"Lab subject {subject} for class {class_name} must have even hours. Current: {hours}")
    
    def _check_lab_constraints(self, day, period, subject, current_class):
        """Check if a lab can be scheduled at this position"""
        if not self.subjects[subject]['is_lab']:
            return True
            
        # Check if we have enough continuous periods
        if period >= self.periods_per_day - 1:
            return False
            
        # Check if next period is free
        if self.timetable[current_class][day, period + 1]:
            return False
            
        # Check teacher availability for both periods
        teacher_id = None
        for t_id, subjects in self.teachers.items():
            if subject in subjects:
                teacher_id = t_id
                break
                
        if teacher_id is None:
            return False
            
        # Check if teacher is available in both periods
        for class_name in self.classes:
            if (self.timetable[class_name][day, period] and 
                self.timetable[class_name][day, period] in self.teachers[teacher_id]):
                return False
            if (self.timetable[class_name][day, period + 1] and 
                self.timetable[class_name][day, period + 1] in self.teachers[teacher_id]):
                return False
                
        return True
    
    def _get_remaining_classes(self):
        remaining = {}
        for class_name in self.classes:
            scheduled_classes = defaultdict(int)
            for day in range(self.days):
                for period in range(self.periods_per_day):
                    if self.timetable[class_name][day, period]:
                        subject = self.timetable[class_name][day, period]
                        if self.subjects[subject]['is_lab']:
                            # Count lab hours as 2 for each slot
                            if period == 0 or self.timetable[class_name][day, period-1] != subject:
                                scheduled_classes[subject] += 2
                        else:
                            scheduled_classes[subject] += 1
            
            remaining[class_name] = {}
            for subject, required in self.class_requirements[class_name].items():
                remaining[class_name][subject] = required - scheduled_classes[subject]
        return remaining
    
    def step(self, action):
        day, period, current_class = self.current_position
        reward = 0
        done = False
        
        if self.subjects[action]['is_lab']:
            # Handle lab scheduling
            if self._check_lab_constraints(day, period, action, current_class):
                self.timetable[current_class][day, period] = action
                self.timetable[current_class][day, period + 1] = action
                reward = 1
                period += 1  # Skip next period as it's used by the lab
            else:
                reward = -1
        else:
            # Handle regular class scheduling
            if self._check_constraints(day, period, action, current_class):
                self.timetable[current_class][day, period] = action
                reward = 1
            else:
                reward = -1
        
        # Move to next position
        class_idx = self.classes.index(current_class)
        class_idx += 1
        if class_idx >= len(self.classes):
            class_idx = 0
            period += 1
            if period >= self.periods_per_day:
                period = 0
                day += 1
        
        if day >= self.days:
            done = True
            if all(all(v == 0 for v in class_reqs.values()) 
                   for class_reqs in self._get_remaining_classes().values()):
                reward += 100
                
        self.current_position = (day, period, self.classes[class_idx])
        return self._get_state(), reward, done'''

# Example usage
if __name__ == "__main__":
    days = 5
    periods_per_day = 3
    classes = ['1A', '1B']
    
    # Define subjects with lab indicator
    subjects = {
        'MATH': {'is_lab': False},
        'PHYSICS': {'is_lab': False},
        'CHEMISTRY': {'is_lab': False},
        'PHYSICS_LAB': {'is_lab': True},
        'CHEMISTRY_LAB': {'is_lab': True}
    }
    
    teachers = {
        'T1': ['MATH'],
        'T2': ['PHYSICS', 'PHYSICS_LAB'],
        'T3': ['CHEMISTRY', 'CHEMISTRY_LAB']
    }
    
    # Define requirements (lab hours must be even)
    class_requirements = {
        '1A': {
            'MATH': 3,
            'PHYSICS': 4,
            'CHEMISTRY': 4,
            'PHYSICS_LAB': 2,
            'CHEMISTRY_LAB': 2   # Will be scheduled as 2 sessions of 2 hours each
        },
        '1B': {
            'MATH': 4,
            'PHYSICS': 3,
            'CHEMISTRY': 4,
            'PHYSICS_LAB': 2,
            'CHEMISTRY_LAB': 2
        }
    }

    # Create and train the model
    env = MultiClassTimetableEnvironment(
        days, periods_per_day, classes, teachers, subjects, class_requirements
    )
    agent = QLearningAgent(list(subjects.keys()))
    
    print("Training the model...")
    best_timetable = train_timetable_generator(env, agent, episodes=1000)

    # Display results
    display_timetable(best_timetable, periods_per_day)

Training the model...
Episode 0, Best Reward: 8
Episode 100, Best Reward: 24
Episode 200, Best Reward: 24
Episode 300, Best Reward: 24
Episode 400, Best Reward: 24
Episode 500, Best Reward: 24
Episode 600, Best Reward: 24
Episode 700, Best Reward: 24
Episode 800, Best Reward: 24
Episode 900, Best Reward: 24

Timetable for Class 1A:
+-----------+---------------+---------------+-------------+
| Day       | Period 1      | Period 2      | Period 3    |
| Monday    | PHYSICS       | CHEMISTRY     | MATH        |
+-----------+---------------+---------------+-------------+
| Tuesday   | MATH          | PHYSICS_LAB   | PHYSICS_LAB |
+-----------+---------------+---------------+-------------+
| Wednesday | PHYSICS       | CHEMISTRY     | PHYSICS     |
+-----------+---------------+---------------+-------------+
| Thursday  | CHEMISTRY_LAB | CHEMISTRY_LAB | PHYSICS     |
+-----------+---------------+---------------+-------------+
| Friday    | MATH          | CHEMISTRY     | -           |
+-----