<a href="https://colab.research.google.com/github/PravalikaMuthoju/INTERNSHIP-PROJECTS/blob/main/JSSP_Via_DRL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import random
import torch
import torch.nn as nn
import torch.optim as optim
from collections import deque, namedtuple

# Load FT10 Dataset
df = pd.read_csv("/content/FT06_Dataset.csv")
df.columns = [c.strip() for c in df.columns]

# Convert machine names like 'M1' to integers
df["Machine"] = df["Machine"].apply(lambda x: int(x[1:]) if isinstance(x, str) and x.startswith("M") else int(x))
df.rename(columns={"Job ID": "Job", "Operation": "Op", "Machine": "Machine", "Processing Time": "Time"}, inplace=True)

num_jobs = df["Job"].nunique()
num_machines = df["Machine"].nunique()
job_ops = df.groupby("Job")["Op"].max().to_dict()
job_ops = {j: v + 1 for j, v in job_ops.items()}
job_data = {(int(r["Job"]), int(r["Op"])): (int(r["Machine"]), int(r["Time"])) for _, r in df.iterrows()}

# Deep Q-Network
class QNetwork(nn.Module):
    def __init__(self, input_size, output_size):
        super(QNetwork, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, 128),
            nn.ReLU(),
            nn.Linear(128, output_size)
        )

    def forward(self, x):
        return self.model(x)

# Settings
gamma = 0.95
epsilon = 1.0
epsilon_min = 0.05
epsilon_decay = 0.995
alpha = 0.001
episodes = 20000 # Increased episodes
batch_size = 128
memory = deque(maxlen=50000)
Transition = namedtuple('Transition', ('state', 'action', 'reward', 'next_state'))

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
input_size = num_jobs + num_machines
output_size = num_jobs

q_net = QNetwork(input_size, output_size).to(device)
optimizer = optim.Adam(q_net.parameters(), lr=alpha)

# Helper functions
def get_state_vector(job_progress, machine_status):
    return np.array(job_progress + machine_status, dtype=np.float32)

def get_feasible_actions(job_progress, machine_status, time):
    actions = []
    for j in range(num_jobs):
        op = job_progress[j]
        if op < job_ops[j]:
            m, pt = job_data[(j, op)]
            if machine_status[m] <= time:
                actions.append(j)
    return actions

def simulate_episode(train=True):
    job_progress = [0] * num_jobs
    machine_status = [0] * num_machines
    schedule = []
    time = 0
    total_reward = 0

    while any(job_progress[j] < job_ops[j] for j in range(num_jobs)):
        state = get_state_vector(job_progress, machine_status)
        feasible = get_feasible_actions(job_progress, machine_status, time)

        if not feasible:
            next_event_time = float('inf')
            for ms in machine_status:
                if ms > time:
                    next_event_time = min(next_event_time, ms)
            if next_event_time == float('inf'):
                if all(job_progress[j] >= job_ops[j] for j in range(num_jobs)):
                    break
                else:
                    print("Error: No feasible actions but jobs are not completed.")
                    break
            time = next_event_time
            continue

        state_tensor = torch.tensor(state, device=device)

        if train and random.random() < epsilon:
            action_job = random.choice(feasible)
        else:
            with torch.no_grad():
                q_vals = q_net(state_tensor).cpu().numpy()
            masked_q = np.full_like(q_vals, -np.inf)
            for j in feasible:
                masked_q[j] = q_vals[j]
            action_job = int(np.argmax(masked_q))

        op = job_progress[action_job]
        m, pt = job_data[(action_job, op)]
        start = max(machine_status[m], time)
        end = start + pt

        # Modified Reward Function: Penalize makespan and idle time
        current_makespan = max(end, max(machine_status)) # Consider potential makespan increase
        reward = -pt - (current_makespan - time) * 0.1 # Penalize processing time and time until next job can start


        next_job_progress = job_progress.copy()
        next_machine_status = machine_status.copy()
        next_job_progress[action_job] += 1
        next_machine_status[m] = end
        next_state = get_state_vector(next_job_progress, next_machine_status)

        if train:
            memory.append(Transition(state, action_job, reward, next_state))

        job_progress = next_job_progress
        machine_status = next_machine_status
        time = min(machine_status)
        schedule.append((action_job, op, m, start, end))
        total_reward += reward

    makespan = max(e for *_, e in schedule) if schedule else 0
    return total_reward, makespan, schedule

# Training loop
makespans = []
schedules = []
# Run one episode with pure random actions to get a baseline initial makespan
_, initial_makespan, _ = simulate_episode(train=False)
makespans.append(initial_makespan)

for ep in range(episodes):
    _, mkspan, sched = simulate_episode(train=True)
    makespans.append(mkspan)
    schedules.append(sched)

    if len(memory) >= batch_size:
        batch = random.sample(memory, batch_size)
        batch = Transition(*zip(*batch))

        state_batch = torch.tensor(batch.state, device=device)
        action_batch = torch.tensor(batch.action, device=device).unsqueeze(1)
        reward_batch = torch.tensor(batch.reward, dtype=torch.float32, device=device).unsqueeze(1)
        next_state_batch = torch.tensor(batch.next_state, device=device)

        q_values = q_net(state_batch).gather(1, action_batch)
        with torch.no_grad():
            q_next = q_net(next_state_batch).max(1)[0].unsqueeze(1)
        target = reward_batch + gamma * q_next

        loss = nn.MSELoss()(q_values, target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    epsilon = max(epsilon_min, epsilon * epsilon_decay)
    if ep % 1000 == 0: # Print less often for more episodes
        print(f"Episode {ep}, Makespan: {mkspan}, Epsilon: {epsilon:.3f}")

# Best result
best_idx = np.argmin(makespans)
best_makespan = makespans[best_idx]
best_schedule = schedules[best_idx]

print("\n🔧 Job-Machine-Time Assignments:")
schedule_df = pd.DataFrame(best_schedule, columns=["Job ID", "Operation", "Machine", "Start", "End"])
schedule_df["Current Makespan"] = schedule_df["End"].cummax()
schedule_df["Contributes to Makespan"] = schedule_df["End"] == schedule_df["Current Makespan"]
print(schedule_df.to_string(index=False))

initial_makespan = makespans[0]
makespan_reduced_by = initial_makespan - best_makespan
total_processing_time = sum(df["Time"])
total_machine_time = best_makespan * num_machines
idle_time = total_machine_time - total_processing_time
machine_utilization = (total_processing_time / total_machine_time) * 100 if total_machine_time > 0 else 0

print(f"\n🚀 Initial Random Makespan: {initial_makespan}")
print(f"📊 Total Optimized Makespan: {best_makespan}")
print(f"📉 Makespan Reduced By: {makespan_reduced_by} time units")
print(f"🛠 Idle Machine Time Units: {idle_time}")
print(f"📈 Machine Utilization: {machine_utilization:.2f}%")

Episode 0, Makespan: 45, Epsilon: 0.995
Episode 1000, Makespan: 45, Epsilon: 0.050
Episode 2000, Makespan: 45, Epsilon: 0.050
Episode 3000, Makespan: 45, Epsilon: 0.050
Episode 4000, Makespan: 45, Epsilon: 0.050
Episode 5000, Makespan: 45, Epsilon: 0.050
Episode 6000, Makespan: 45, Epsilon: 0.050
Episode 7000, Makespan: 45, Epsilon: 0.050
Episode 8000, Makespan: 45, Epsilon: 0.050
Episode 9000, Makespan: 45, Epsilon: 0.050
Episode 10000, Makespan: 45, Epsilon: 0.050
Episode 11000, Makespan: 45, Epsilon: 0.050
Episode 12000, Makespan: 45, Epsilon: 0.050
Episode 13000, Makespan: 45, Epsilon: 0.050
Episode 14000, Makespan: 45, Epsilon: 0.050
Episode 15000, Makespan: 45, Epsilon: 0.050
Episode 16000, Makespan: 45, Epsilon: 0.050
Episode 17000, Makespan: 45, Epsilon: 0.050
Episode 18000, Makespan: 45, Epsilon: 0.050
Episode 19000, Makespan: 45, Epsilon: 0.050

🔧 Job-Machine-Time Assignments:
 Job ID  Operation  Machine  Start  End  Current Makespan  Contributes to Makespan
      0          

In [None]:
# ✅ Improvements based on your request:
# - Accurately compare initial vs optimized makespan
# - Compute real machine usage time (not just sum of all processing times)
# - Show only final makespan-contributing operations
# - Make output easier to analyze

import pandas as pd
import numpy as np
import random
import torch
import torch.nn as nn
import torch.optim as optim
from collections import deque, namedtuple

# Load dataset
df = pd.read_csv("/content/FT06_Dataset.csv")
df.columns = [c.strip() for c in df.columns]
df["Machine"] = df["Machine"].apply(lambda x: int(x[1:]) if isinstance(x, str) and x.startswith("M") else int(x))
df.rename(columns={"Job ID": "Job", "Operation": "Op", "Machine": "Machine", "Processing Time": "Time"}, inplace=True)

num_jobs = df["Job"].nunique()
num_machines = df["Machine"].nunique()
job_ops = df.groupby("Job")["Op"].max().to_dict()
job_ops = {j: v + 1 for j, v in job_ops.items()}
job_data = {(int(r["Job"]), int(r["Op"])): (int(r["Machine"]), int(r["Time"])) for _, r in df.iterrows()}

class QNetwork(nn.Module):
    def __init__(self, input_size, output_size):
        super(QNetwork, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, 128),
            nn.ReLU(),
            nn.Linear(128, output_size)
        )

    def forward(self, x):
        return self.model(x)

# Config
gamma = 0.95
epsilon = 1.0
epsilon_min = 0.05
epsilon_decay = 0.995
alpha = 0.001
episodes = 20000
batch_size = 128
memory = deque(maxlen=50000)
Transition = namedtuple('Transition', ('state', 'action', 'reward', 'next_state'))

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
input_size = num_jobs + num_machines
output_size = num_jobs

q_net = QNetwork(input_size, output_size).to(device)
optimizer = optim.Adam(q_net.parameters(), lr=alpha)

def get_state_vector(job_progress, machine_status):
    return np.array(job_progress + machine_status, dtype=np.float32)

def get_feasible_actions(job_progress, machine_status, time):
    return [j for j in range(num_jobs)
            if job_progress[j] < job_ops[j]
            and machine_status[job_data[(j, job_progress[j])][0]] <= time]

def simulate_episode(train=True):
    job_progress = [0] * num_jobs
    machine_status = [0] * num_machines
    schedule = []
    time = 0
    total_reward = 0

    while any(job_progress[j] < job_ops[j] for j in range(num_jobs)):
        state = get_state_vector(job_progress, machine_status)
        feasible = get_feasible_actions(job_progress, machine_status, time)

        if not feasible:
            next_time = min([ms for ms in machine_status if ms > time], default=time + 1)
            time = next_time
            continue

        state_tensor = torch.tensor(state, device=device)
        if train and random.random() < epsilon:
            action_job = random.choice(feasible)
        else:
            with torch.no_grad():
                q_vals = q_net(state_tensor).cpu().numpy()
            masked_q = np.full_like(q_vals, -np.inf)
            for j in feasible:
                masked_q[j] = q_vals[j]
            action_job = int(np.argmax(masked_q))

        op = job_progress[action_job]
        m, pt = job_data[(action_job, op)]
        start = max(machine_status[m], time)
        end = start + pt

        reward = -pt - (end - time) * 0.1
        next_job_progress = job_progress.copy()
        next_machine_status = machine_status.copy()
        next_job_progress[action_job] += 1
        next_machine_status[m] = end
        next_state = get_state_vector(next_job_progress, next_machine_status)

        if train:
            memory.append(Transition(state, action_job, reward, next_state))

        job_progress = next_job_progress
        machine_status = next_machine_status
        time = min(machine_status)
        schedule.append((action_job, op, m, start, end))
        total_reward += reward

    makespan = max(e for *_, e in schedule) if schedule else 0
    return total_reward, makespan, schedule

# Training loop
makespans = []
schedules = []
_, initial_makespan, _ = simulate_episode(train=False)
makespans.append(initial_makespan)

for ep in range(episodes):
    _, mkspan, sched = simulate_episode(train=True)
    makespans.append(mkspan)
    schedules.append(sched)

    if len(memory) >= batch_size:
        batch = random.sample(memory, batch_size)
        batch = Transition(*zip(*batch))

        state_batch = torch.tensor(batch.state, device=device)
        action_batch = torch.tensor(batch.action, device=device).unsqueeze(1)
        reward_batch = torch.tensor(batch.reward, dtype=torch.float32, device=device).unsqueeze(1)
        next_state_batch = torch.tensor(batch.next_state, device=device)

        q_values = q_net(state_batch).gather(1, action_batch)
        with torch.no_grad():
            q_next = q_net(next_state_batch).max(1)[0].unsqueeze(1)
        target = reward_batch + gamma * q_next

        loss = nn.MSELoss()(q_values, target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    epsilon = max(epsilon_min, epsilon * epsilon_decay)
    if ep % 1000 == 0:
        print(f"Episode {ep}, Makespan: {mkspan}, Epsilon: {epsilon:.3f}")

# Final Results
best_idx = np.argmin(makespans)
best_makespan = makespans[best_idx]
best_schedule = schedules[best_idx]

schedule_df = pd.DataFrame(best_schedule, columns=["Job ID", "Operation", "Machine", "Start", "End"])
schedule_df.sort_values(by="Start", inplace=True)
schedule_df["Current Makespan"] = schedule_df["End"].cummax()
schedule_df["Contributes to Makespan"] = schedule_df["End"] == best_makespan
used_time = schedule_df["End"].sub(schedule_df["Start"]).sum()
idle_machines = num_machines * best_makespan - used_time
utilization = used_time / (num_machines * best_makespan)

print("\n🔧 Job-Machine-Time Assignments:")
print(schedule_df.to_string(index=False))
print(f"\n🚀 Initial Random Makespan: {initial_makespan}")
print(f"📊 Total Optimized Makespan: {best_makespan}")
print(f"📉 Makespan Reduced By: {initial_makespan - best_makespan} time units")
print(f"🛠 Idle Machine Time Units: {int(idle_machines)}")
print(f"📈 Machine Utilization: {utilization:.2%}")


Episode 0, Makespan: 45, Epsilon: 0.995
Episode 1000, Makespan: 45, Epsilon: 0.050
Episode 2000, Makespan: 45, Epsilon: 0.050
Episode 3000, Makespan: 45, Epsilon: 0.050
Episode 4000, Makespan: 45, Epsilon: 0.050
Episode 5000, Makespan: 45, Epsilon: 0.050
Episode 6000, Makespan: 45, Epsilon: 0.050
Episode 7000, Makespan: 45, Epsilon: 0.050
Episode 8000, Makespan: 45, Epsilon: 0.050
Episode 9000, Makespan: 45, Epsilon: 0.050
Episode 10000, Makespan: 45, Epsilon: 0.050
Episode 11000, Makespan: 45, Epsilon: 0.050
Episode 12000, Makespan: 45, Epsilon: 0.050
Episode 13000, Makespan: 45, Epsilon: 0.050
Episode 14000, Makespan: 45, Epsilon: 0.050
Episode 15000, Makespan: 45, Epsilon: 0.050
Episode 16000, Makespan: 45, Epsilon: 0.050
Episode 17000, Makespan: 45, Epsilon: 0.050
Episode 18000, Makespan: 45, Epsilon: 0.050
Episode 19000, Makespan: 45, Epsilon: 0.050

🔧 Job-Machine-Time Assignments:
 Job ID  Operation  Machine  Start  End  Current Makespan  Contributes to Makespan
      0          

In [None]:
# ✅ Final Optimized Version for FT10 or T10 Dataset
# Uses smart reward + longer training + better epsilon

import pandas as pd
import numpy as np
import random
import torch
import torch.nn as nn
import torch.optim as optim
from collections import deque, namedtuple

# Load FT10 or T10 Dataset
df = pd.read_csv("/content/FT10_Dataset.csv")  # change to T10_Dataset.csv if needed
df.columns = [c.strip() for c in df.columns]
df["Machine"] = df["Machine"].apply(lambda x: int(x[1:]) if isinstance(x, str) and x.startswith("M") else int(x))
df.rename(columns={"Job ID": "Job", "Operation": "Op", "Machine": "Machine", "Processing Time": "Time"}, inplace=True)

num_jobs = df["Job"].nunique()
num_machines = df["Machine"].nunique()
job_ops = df.groupby("Job")["Op"].max().to_dict()
job_ops = {j: v + 1 for j, v in job_ops.items()}
job_data = {(int(r["Job"]), int(r["Op"])): (int(r["Machine"]), int(r["Time"])) for _, r in df.iterrows()}

class QNetwork(nn.Module):
    def __init__(self, input_size, output_size):
        super(QNetwork, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, output_size)
        )

    def forward(self, x):
        return self.model(x)

# Config
gamma = 0.95
epsilon = 1.0
epsilon_min = 0.05
epsilon_decay = 0.999
alpha = 0.0005
episodes = 30000
batch_size = 256
memory = deque(maxlen=100000)
Transition = namedtuple('Transition', ('state', 'action', 'reward', 'next_state'))

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
input_size = num_jobs + num_machines
output_size = num_jobs

q_net = QNetwork(input_size, output_size).to(device)
optimizer = optim.Adam(q_net.parameters(), lr=alpha)

def get_state_vector(job_progress, machine_status):
    return np.array(job_progress + machine_status, dtype=np.float32)

def get_feasible_actions(job_progress, machine_status, time):
    return [j for j in range(num_jobs)
            if job_progress[j] < job_ops[j]
            and machine_status[job_data[(j, job_progress[j])][0]] <= time]

def simulate_episode(train=True):
    job_progress = [0] * num_jobs
    machine_status = [0] * num_machines
    schedule = []
    time = 0
    total_reward = 0

    while any(job_progress[j] < job_ops[j] for j in range(num_jobs)):
        state = get_state_vector(job_progress, machine_status)
        feasible = get_feasible_actions(job_progress, machine_status, time)

        if not feasible:
            time = min([ms for ms in machine_status if ms > time], default=time + 1)
            continue

        state_tensor = torch.tensor(state, device=device)
        if train and random.random() < epsilon:
            action_job = random.choice(feasible)
        else:
            with torch.no_grad():
                q_vals = q_net(state_tensor).cpu().numpy()
            masked_q = np.full_like(q_vals, -np.inf)
            for j in feasible:
                masked_q[j] = q_vals[j]
            action_job = int(np.argmax(masked_q))

        op = job_progress[action_job]
        m, pt = job_data[(action_job, op)]
        start = max(machine_status[m], time)
        end = start + pt

        next_job_progress = job_progress.copy()
        next_machine_status = machine_status.copy()
        next_job_progress[action_job] += 1
        next_machine_status[m] = end
        next_state = get_state_vector(next_job_progress, next_machine_status)

        idle_penalty = sum([max(0, time - ms) for ms in machine_status])
        reward = -pt - 0.3 * (start - time) - 0.05 * end - 0.1 * idle_penalty

        if train:
            memory.append(Transition(state, action_job, reward, next_state))

        job_progress = next_job_progress
        machine_status = next_machine_status
        time = min(machine_status)
        schedule.append((action_job, op, m, start, end))
        total_reward += reward

    makespan = max(e for *_, e in schedule) if schedule else 0
    return total_reward, makespan, schedule

# Training loop
makespans = []
schedules = []
_, initial_makespan, _ = simulate_episode(train=False)
makespans.append(initial_makespan)

for ep in range(episodes):
    _, mkspan, sched = simulate_episode(train=True)
    makespans.append(mkspan)
    schedules.append(sched)

    if len(memory) >= batch_size:
        batch = random.sample(memory, batch_size)
        batch = Transition(*zip(*batch))

        state_batch = torch.tensor(batch.state, device=device)
        action_batch = torch.tensor(batch.action, device=device).unsqueeze(1)
        reward_batch = torch.tensor(batch.reward, dtype=torch.float32, device=device).unsqueeze(1)
        next_state_batch = torch.tensor(batch.next_state, device=device)

        q_values = q_net(state_batch).gather(1, action_batch)
        with torch.no_grad():
            q_next = q_net(next_state_batch).max(1)[0].unsqueeze(1)
        target = reward_batch + gamma * q_next

        loss = nn.MSELoss()(q_values, target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    epsilon = max(epsilon_min, epsilon * epsilon_decay)
    if ep % 1000 == 0:
        print(f"Episode {ep}, Makespan: {mkspan}, Epsilon: {epsilon:.3f}")

# Final Results
best_idx = np.argmin(makespans)
best_makespan = makespans[best_idx]
best_schedule = schedules[best_idx]

schedule_df = pd.DataFrame(best_schedule, columns=["Job ID", "Operation", "Machine", "Start", "End"])
schedule_df.sort_values(by="Start", inplace=True)
schedule_df["Current Makespan"] = schedule_df["End"].cummax()
schedule_df["Contributes to Makespan"] = schedule_df["End"] == best_makespan
used_time = schedule_df["End"].sub(schedule_df["Start"]).sum()
idle_machines = num_machines * best_makespan - used_time
utilization = used_time / (num_machines * best_makespan)

print("\n🔧 Job-Machine-Time Assignments:")
print(schedule_df.to_string(index=False))
print(f"\n🚀 Initial Random Makespan: {initial_makespan}")
print(f"📊 Total Optimized Makespan: {best_makespan}")
print(f"📉 Makespan Reduced By: {initial_makespan - best_makespan} time units")
print(f"🛠 Idle Machine Time Units: {int(idle_machines)}")
print(f"📈 Machine Utilization: {utilization:.2%}")


Episode 0, Makespan: 704, Epsilon: 0.999
Episode 1000, Makespan: 704, Epsilon: 0.367
Episode 2000, Makespan: 704, Epsilon: 0.135
Episode 3000, Makespan: 704, Epsilon: 0.050
Episode 4000, Makespan: 704, Epsilon: 0.050
Episode 5000, Makespan: 704, Epsilon: 0.050
Episode 6000, Makespan: 704, Epsilon: 0.050
Episode 7000, Makespan: 704, Epsilon: 0.050
Episode 8000, Makespan: 704, Epsilon: 0.050
