<a href="https://colab.research.google.com/github/2003UJAN/AI-Powered-Seat-Allocation-Optimization/blob/main/Welcome_To_Colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
!pip install gym torch numpy stable-baselines3




In [11]:
import gym
import numpy as np
import random
import torch
from gym import spaces

In [12]:
class SeatAllocationEnv(gym.Env):
    def __init__(self, rows=10, cols=6, max_passengers=60):
        super(SeatAllocationEnv, self).__init__()

        self.rows = rows
        self.cols = cols
        self.max_passengers = max_passengers

        self.seats = np.zeros((self.rows, self.cols))

        self.passenger_prefs = ["window", "aisle", "extra_legroom"]

        self.action_space = spaces.Discrete(self.rows * self.cols)
        self.observation_space = spaces.Box(low=0, high=1, shape=(self.rows, self.cols), dtype=np.float32)

    def reset(self):
        """Reset the environment at the start of an episode."""
        self.seats = np.zeros((self.rows, self.cols))
        return self.seats.flatten()

    def step(self, action):
        """Perform an action in the environment."""
        row, col = divmod(action, self.cols)

        if self.seats[row, col] == 1:
            reward = -5
            done = False
        else:
            self.seats[row, col] = 1
            reward = self._calculate_reward(row, col)
            done = np.sum(self.seats) >= self.max_passengers

        return self.seats.flatten(), reward, done, {}

    def _calculate_reward(self, row, col):
        reward = 1

        if col == 0 or col == self.cols - 1:
            reward += 2
        elif col in [1, self.cols - 2]:
            reward += 1

        return reward

    def render(self, mode="human"):
        print(self.seats)

In [13]:
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from collections import deque

In [14]:
class DQN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, output_dim)

    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)

class DQNAgent:
    def __init__(self, state_dim, action_dim, lr=0.001, gamma=0.99):
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.gamma = gamma
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995

        self.model = DQN(state_dim, action_dim)
        self.optimizer = optim.Adam(self.model.parameters(), lr=lr)
        self.loss_fn = nn.MSELoss()
        self.memory = deque(maxlen=1000)

    def select_action(self, state):
        if np.random.rand() < self.epsilon:
            return np.random.randint(self.action_dim)
        state = torch.FloatTensor(state).unsqueeze(0)
        return torch.argmax(self.model(state)).item()

    def store_experience(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def train(self, batch_size=32):
        if len(self.memory) < batch_size:
            return

        batch = random.sample(self.memory, batch_size)
        states, actions, rewards, next_states, dones = zip(*batch)

        states = torch.FloatTensor(states)
        actions = torch.LongTensor(actions)
        rewards = torch.FloatTensor(rewards)
        next_states = torch.FloatTensor(next_states)
        dones = torch.FloatTensor(dones)

        q_values = self.model(states).gather(1, actions.unsqueeze(1)).squeeze()
        next_q_values = self.model(next_states).max(1)[0].detach()
        expected_q_values = rewards + self.gamma * next_q_values * (1 - dones)

        loss = self.loss_fn(q_values, expected_q_values)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

In [None]:
env = SeatAllocationEnv()
agent = DQNAgent(env.observation_space.shape[0] * env.observation_space.shape[1], env.action_space.n)

num_episodes = 1000
for episode in range(num_episodes):
    state = env.reset()
    done = False
    total_reward = 0

    while not done:
        action = agent.select_action(state)
        next_state, reward, done, _ = env.step(action)
        agent.store_experience(state, action, reward, next_state, done)
        agent.train()
        state = next_state
        total_reward += reward

    print(f"Episode {episode+1}, Total Reward: {total_reward}")

env.render()

  dones = torch.FloatTensor(dones)


Episode 1, Total Reward: -41180
Episode 2, Total Reward: -18540
Episode 3, Total Reward: -13725
Episode 4, Total Reward: -12445
Episode 5, Total Reward: -12100
Episode 6, Total Reward: -11695
Episode 7, Total Reward: -48050
Episode 8, Total Reward: -5655
Episode 9, Total Reward: -22545
Episode 10, Total Reward: -28190
Episode 11, Total Reward: -5020
Episode 12, Total Reward: -59935
Episode 13, Total Reward: -19495
Episode 14, Total Reward: -54960
Episode 15, Total Reward: -144320
Episode 16, Total Reward: -71000
Episode 17, Total Reward: -58855
Episode 18, Total Reward: -95150
Episode 19, Total Reward: -29500
Episode 20, Total Reward: -35485
Episode 21, Total Reward: -123510
Episode 22, Total Reward: -128310
Episode 23, Total Reward: -87035
Episode 24, Total Reward: -93810
Episode 25, Total Reward: -91765
Episode 26, Total Reward: -69515
Episode 27, Total Reward: -90930
Episode 28, Total Reward: -75855
Episode 29, Total Reward: -77645
Episode 30, Total Reward: -100375
Episode 31, Total

In [None]:
from stable_baselines3 import PPO

In [None]:
ppo_model = PPO("MlpPolicy", env, verbose=1)
ppo_model.learn(total_timesteps=10000)

obs = env.reset()
done = False

while not done:
    action, _ = ppo_model.predict(obs)
    obs, reward, done, _ = env.step(action)
    env.render()