In [6]:
print("Hello")

Hello


In [13]:
import numpy as np
import pandas as pd
import random
from gym import Env
from gym.spaces import Discrete, Box

class TimetableEnv(Env):
    def __init__(self, num_days, periods_per_day, lunch_after_period, teachers, classes):
        # Inputs
        self.num_days = num_days
        self.periods_per_day = periods_per_day
        self.lunch_after_period = lunch_after_period
        self.teachers = teachers
        self.classes = classes
        
        # Total periods (excluding lunch break)
        self.total_periods = self.num_days * self.periods_per_day
        
        # Timetable: A 3D array [days][periods][classes]
        self.timetable = np.full((num_days, periods_per_day, len(classes)), None)
        
        # RL state and action space
        self.action_space = Discrete(len(teachers) * len(classes) * self.num_days * self.periods_per_day)
        self.observation_space = Box(low=0, high=1, shape=(num_days, periods_per_day, len(classes)), dtype=int)
        
        # Remaining lectures for each class and teacher
        self.remaining_lectures = {}
        for class_name in classes:
            print(f"\nFor {class_name}:")
            self.remaining_lectures[class_name] = {}
            for teacher in teachers:
                print(f"  Teacher {teacher['name']} teaches: {', '.join(teacher['subjects'].keys())}")
                for subject, total_lectures in teacher['subjects'].items():
                    num_lectures = int(input(f"    Enter number of {subject} lectures for {class_name} (max {total_lectures}): "))
                    if teacher['name'] not in self.remaining_lectures[class_name]:
                       self.remaining_lectures[class_name][teacher['name']] = {}
                    self.remaining_lectures[class_name][teacher['name']][subject] = num_lectures

    
    def reset(self):
        # Reset timetable and remaining lectures
        self.timetable = np.full((self.num_days, self.periods_per_day, len(self.classes)), None)
        self.remaining_lectures = {class_name: {teacher['name']: teacher['subjects'] for teacher in self.teachers} for class_name in self.classes}
        return self.timetable
    
    def step(self, action):
        # Decode the action into teacher, class, day, and period
        teacher_idx = action // (len(self.classes) * self.num_days * self.periods_per_day)
        class_idx = (action % (len(self.classes) * self.num_days * self.periods_per_day)) // (self.num_days * self.periods_per_day)
        day = (action % (self.num_days * self.periods_per_day)) // self.periods_per_day
        period = action % self.periods_per_day
        
        teacher = self.teachers[teacher_idx]
        class_name = self.classes[class_idx]
        
        # Check if valid action
        if self.timetable[day, period, class_idx] is not None:
            return self.timetable, -1, False, {}
        
        # Update timetable and remaining lectures
        subject = None
        for subj, count in self.remaining_lectures[class_name][teacher['name']].items():
            if count > 0:
                subject = subj
                break
        
        if subject is None:
            return self.timetable, -1, False, {}
        
        self.timetable[day, period, class_idx] = (teacher['code'], subject)
        self.remaining_lectures[class_name][teacher['name']][subject] -= 1
        
        # Reward calculation
        reward = 1
        if period == self.lunch_after_period:
            reward -= 0.5  # Discourage labs being split by lunch break
        # Count occurrences of (teacher['code'], subject) in the timetable for the given class
        occurrences = np.sum([
            (self.timetable[day, period, class_idx] == (teacher['code'], subject)) 
            for day in range(self.num_days) 
            for period in range(self.periods_per_day)
        ])
        if occurrences > 1:
            reward -= 0.5  # Discourage consecutive same subject

            reward -= 0.5  # Discourage consecutive same subject
        
        # Check if all lectures are allocated
        done = all(all(all(v == 0 for v in subj.values()) for subj in class_data.values()) for class_data in self.remaining_lectures.values())
        
        return self.timetable, reward, done, {}
    
    def render(self):
        for class_idx, class_name in enumerate(self.classes):
            print(f"\nTimetable for {class_name}:")
            for day in range(self.num_days):
                print(f"Day {day+1}: ", end="")
                for period in range(self.periods_per_day):
                    if self.timetable[day, period, class_idx]:
                        print(self.timetable[day, period, class_idx], end=" | ")
                    else:
                        print("Free", end=" | ")
                print()


In [8]:
import random

class RLAgent:
    def __init__(self, env):
        self.env = env
        self.q_table = np.zeros((env.observation_space.shape[0], env.action_space.n))
        self.alpha = 0.1  # Learning rate
        self.gamma = 0.9  # Discount factor
        self.epsilon = 1.0  # Exploration rate
        self.epsilon_decay = 0.995
        self.min_epsilon = 0.01
    
    def train(self, episodes):
        for episode in range(episodes):
            state = self.env.reset()
            done = False
            while not done:
                if random.uniform(0, 1) < self.epsilon:
                    action = self.env.action_space.sample()  # Explore
                else:
                    action = np.argmax(self.q_table[state])  # Exploit
                
                next_state, reward, done, _ = self.env.step(action)
                self.q_table[state, action] = (1 - self.alpha) * self.q_table[state, action] + \
                                              self.alpha * (reward + self.gamma * np.max(self.q_table[next_state]))
                state = next_state
            
            self.epsilon = max(self.min_epsilon, self.epsilon * self.epsilon_decay)


In [14]:
# Accept timetable structure
num_days = int(input("Enter the number of days: "))
periods_per_day = int(input("Enter the number of periods in one day: "))
lunch_after_period = int(input("Enter the period number after which lunch break occurs: "))

# Accept teacher information
num_teachers = int(input("Enter the number of teachers: "))
teachers = []
for i in range(num_teachers):
    teacher_name = input(f"Enter name for Teacher {i+1}: ")
    teacher_code = input(f"Enter code for Teacher {i+1}: ")
    num_subjects = int(input(f"Enter the number of subjects Teacher {teacher_name} teaches: "))
    subjects = {}
    for j in range(num_subjects):
        subject_name = input(f"  Enter subject {j+1} name: ")
        num_lectures = int(input(f"  Enter number of lectures for {subject_name} in a week: "))
        subjects[subject_name] = num_lectures
    teachers.append({"name": teacher_name, "code": teacher_code, "subjects": subjects})

# Accept class information
num_classes = int(input("Enter the number of classes: "))
classes = []
for i in range(num_classes):
    class_name = input(f"Enter name for Class {i+1}: ")
    classes.append(class_name)

# Initialize the environment
env = TimetableEnv(
    num_days=num_days, 
    periods_per_day=periods_per_day, 
    lunch_after_period=lunch_after_period, 
    teachers=teachers, 
    classes=classes
)

agent = RLAgent(env)



For 10a:
  Teacher jalpa teaches: os
  Teacher nishant teaches: ml

For 10b:
  Teacher jalpa teaches: os
  Teacher nishant teaches: ml


In [15]:
agent.train(episodes=1000)


IndexError: arrays used as indices must be of integer (or boolean) type

In [None]:
env.reset()
done = False
while not done:
    action = np.argmax(agent.q_table)
    state, reward, done, _ = env.step(action)
env.render()
