In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import networkx as nx
import json
import numpy as np
import os

class GCNLayer(nn.Module):
    def __init__(self, in_features, out_features):
        super(GCNLayer, self).__init__()
        self.linear = nn.Linear(in_features, out_features)

    def forward(self, node_features, adj_matrix):
         # Perform a matrix multiplication for neighbor aggregation
         aggregated_features = torch.matmul(adj_matrix, node_features)
         # Linear Transformation
         node_embeddings = self.linear(aggregated_features)
         return node_embeddings

class MyGCN(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(MyGCN, self).__init__()
        self.gcn1 = GCNLayer(input_dim, hidden_dim)
        self.gcn2 = GCNLayer(hidden_dim, output_dim)

    def forward(self, adj_matrix, node_features):
      # Perform forward pass with activation
      h1 = F.relu(self.gcn1(node_features, adj_matrix))
      h2 = self.gcn2(h1, adj_matrix)
      return h2


class MyHeuristic():
    def __init__(self, params):
        self.params = params

    def __call__(self, gnn_embeddings, raw_features):
      #Implement your heuristic function
       priorities = {}
       for key, values in raw_features["jobs"].items():
           job_id = values["job_id"]
           priority = values["priority"]
           priorities[f"job_{job_id}"]=priority
       action = max(priorities, key = priorities.get)
       return action

class FMSDatasetGenerator:
    """Generates datasets for an FMS scheduling problem."""

    def __init__(self, num_jobs, num_machines, num_ops_per_job_range, processing_time_range,
                 failure_rate, setup_time_range, resource_capacity, load_factor,
                 dynamic_arrival=True, fixed_routing=False):
        """
        Initializes the generator.
        Args:
            num_jobs (int or tuple): The number of jobs, or a range for the number of jobs
            num_machines (int): The number of machines.
            num_ops_per_job_range (tuple): Range of operations a job can have.
            processing_time_range (tuple): Processing time range (min, max).
            failure_rate (float): Probability of machine failure (per simulation step).
            setup_time_range (tuple): range for setup times (min, max).
            resource_capacity(int): number of resources shared by machines
            load_factor (float): Load factor for resource contention(percentage of machines that need shared resources).
            dynamic_arrival (bool): Whether to add dynamic arrivals
            fixed_routing (bool): whether the routing for each job is fixed
        """
        self.num_jobs = num_jobs if isinstance(num_jobs, int) else random.randint(*num_jobs)
        self.num_machines = num_machines
        self.num_ops_per_job_range = num_ops_per_job_range
        self.processing_time_range = processing_time_range
        self.failure_rate = failure_rate
        self.setup_time_range = setup_time_range
        self.resource_capacity = resource_capacity
        self.load_factor = load_factor
        self.dynamic_arrival = dynamic_arrival
        self.fixed_routing = fixed_routing
        self.machines = self._generate_machines()
        self.jobs = self._generate_jobs()
        self.resources = self._generate_resources()

    def _generate_resources(self):
      resources = {
          "resource_capacity": self.resource_capacity,
          "machines":  random.sample(self.machines, int(len(self.machines) * self.load_factor))
      }
      return resources

    def _generate_machines(self):
        """Generates machine data."""
        machines = []
        for machine_id in range(self.num_machines):
            machines.append({"machine_id": machine_id,
                               "status": "available",  # initially available
                               "current_job": None,
                               "next_available_time": 0,
                               "type": random.choice(["general", "special"])  # Example of different machine types
                              })
        return machines

    def _generate_jobs(self):
      """Generates job data."""
      jobs = []
      for job_id in range(self.num_jobs):
          num_ops = random.randint(*self.num_ops_per_job_range)
          ops = self._generate_job_ops(job_id, num_ops)
          jobs.append({"job_id": job_id, "operations": ops,
                       "arrival_time": 0 if not self.dynamic_arrival else random.randint(0,50),
                       "completed_operations":0,
                       "priority": random.randint(1,5),
                       "machine_seq": [op["machine"] for op in ops],
                       "current_operation":0
                       })
      return jobs

    def _generate_job_ops(self, job_id, num_ops):
        """Generates operations for a single job."""
        ops = []
        available_machines = self._get_available_machines()
        if self.fixed_routing:
          machine_seq = random.sample(available_machines, num_ops)
        else:
            machine_seq = []

        for op_id in range(num_ops):
          if self.fixed_routing:
            machine = machine_seq[op_id]
          else:
              machine = random.choice(available_machines)
          proc_time = self._triangular_distribution(*self.processing_time_range)
          setup_time = random.uniform(*self.setup_time_range) if op_id > 0 else 0.0
          ops.append({"job_id": job_id, "op_id": op_id, "machine": machine,
                          "processing_time": proc_time, "setup_time": setup_time,
                          "status": "pending"  # initially pending
                         })
        return ops

    def _triangular_distribution(self, min, max):
        mode = (min + max)/2
        u = random.uniform(0,1)
        if u <= (mode-min)/(max-min):
            return min + np.sqrt(u*(max-min)*(mode-min))
        else:
             return max-np.sqrt((1-u)*(max-min)*(max-mode))

    def _get_available_machines(self):
      available_machines = []
      for m in self.machines:
          available_machines.append(m["machine_id"])
      return available_machines


    def save_dataset(self, filename, save_dir):
        """Saves dataset to a JSON file."""
        os.makedirs(save_dir, exist_ok = True)
        dataset = {
            "jobs": self.jobs,
            "machines": self.machines,
            "resources": self.resources,
        }

        with open(os.path.join(save_dir,filename), 'w') as f:
            json.dump(dataset, f, indent=4)

    @classmethod
    def load_dataset(cls, filename):
        """Loads dataset from a JSON file."""
        with open(filename, 'r') as f:
            dataset = json.load(f)

        instance = cls(num_jobs=len(dataset['jobs']),
                      num_machines=len(dataset['machines']),
                      num_ops_per_job_range=(3, 5),  # Dummy range
                      processing_time_range=(2, 10),  # Dummy range
                      failure_rate=0.1,  # Dummy rate
                      setup_time_range=(1, 2),  # Dummy time
                      resource_capacity=5,  # Dummy capacity
                      load_factor=0.4  # Dummy load factor
                      )
        instance.jobs = dataset["jobs"]
        instance.machines = dataset["machines"]
        instance.resources = dataset["resources"]
        return instance

def create_graph_and_features(dataset, current_time = 0):
  """
        Creates graph data and feature vectors from current dataset status.
        Args:
            dataset (dict): The loaded dataset from JSON
            current_time (int): current time of the simulation
        Returns:
            tuple: (graph_data (nx.Graph), heuristic_data (dict))
        """
  jobs = dataset.jobs
  machines = dataset.machines
  graph = nx.DiGraph()
  heuristic_data = {"jobs": [], "machines": [], "operations": []}
   # 1. Node generation:
  # Add job nodes
  for job in jobs:
      graph.add_node(f"job_{job['job_id']}", type="job", **job)
      heuristic_data["jobs"].append(_extract_job_features(job,current_time))

  # Add machine nodes
  for machine in machines:
    # Create a copy without 'type' or any other non-node property
      machine_node_data = {k: v for k, v in machine.items() if k not in ["type", "current_job"]}
      graph.add_node(f"machine_{machine['machine_id']}", type="machine", **machine_node_data)
      heuristic_data["machines"].append(_extract_machine_features(machine, current_time))

  # Add operation nodes
  for job in jobs:
      for op in job["operations"]:
          graph.add_node(f"op_{job['job_id']}_{op['op_id']}", type="operation", **op)
          heuristic_data["operations"].append(_extract_op_features(op, job, current_time))

  # 2. Edge Generation
  #Edges for operations in a job sequence
  for job in jobs:
      ops = job["operations"]
      for i in range(len(ops)-1):
          graph.add_edge(f"op_{job['job_id']}_{ops[i]['op_id']}",
                      f"op_{job['job_id']}_{ops[i+1]['op_id']}", type="op_seq")

  #Edges between operations and machines
  for job in jobs:
      for op in job["operations"]:
         machine_node = f"machine_{op['machine']}"
         graph.add_edge(f"op_{job['job_id']}_{op['op_id']}", machine_node, type="machine_link")

  #Disjunctive edges for operations in a machine
  for m in machines:
      ops_in_machine = []
      for job in jobs:
          for op in job["operations"]:
              if op["machine"] == m["machine_id"]:
                  ops_in_machine.append(f"op_{job['job_id']}_{op['op_id']}")
      for i in range(len(ops_in_machine)):
          for j in range(i+1, len(ops_in_machine)):
              graph.add_edge(ops_in_machine[i], ops_in_machine[j], type = "disjunctive_link")

  return graph, heuristic_data


def _extract_job_features(job, current_time):
   """
       Extracts a feature vector for job.
       Args:
            job (dict): current job
           current_time (int): current simulation time
       Returns:
            dict: features
       """
   time_since_arrival = current_time-job["arrival_time"]
   remaining_operations = len(job["operations"])- job["completed_operations"]
   features = {"job_id": job["job_id"],
               "priority": job["priority"],
               "time_since_arrival": time_since_arrival,
               "remaining_operations":remaining_operations,
               "completed_operations": job["completed_operations"]
               }
   return features

def _extract_machine_features(machine, current_time):
   """Extracts a feature vector for machine"""
   features = { "machine_id": machine["machine_id"],
               "status": machine["status"],
                "next_available_time": machine["next_available_time"] - current_time if machine["status"]=="busy" else 0 ,
              "type": machine["type"]}
   return features

def _extract_op_features(op, job, current_time):
   """Extracts a feature vector for operation."""
   features = { "job_id": job["job_id"],
               "op_id": op["op_id"],
                "machine": op["machine"],
                "processing_time": op["processing_time"],
                "setup_time": op["setup_time"],
                "status": op["status"]
   }
   return features
def prepare_gnn_input(graph, feature_data):
    """Prepares data for your GNN."""
    # 1. Node Features
    node_features = []
    node_ids = []
    for node_id, node_data in graph.nodes(data=True):
       if node_data["type"]=="job":
           node_features.append(list(feature_data["jobs"][node_id].values()))
       elif node_data["type"]=="machine":
         node_features.append(list(feature_data["machines"][node_id].values()))
       elif node_data["type"]=="operation":
        node_features.append(list(feature_data["operations"][node_id].values()))
       node_ids.append(node_id)

    node_features_tensor = torch.tensor(node_features, dtype=torch.float32)

     # 2. Edge list and edge features (optional based on the GNN used)
    edge_list = list(graph.edges())
    edge_features = []
    # create indices based on node_id, then create a tensor of edges
    node_ids_map = {value: index for index, value in enumerate(node_ids)}
    edge_index_list = [[node_ids_map[edge[0]], node_ids_map[edge[1]]] for edge in edge_list]
    edge_index_tensor = torch.tensor(edge_index_list, dtype = torch.long).t().contiguous()

    # Create adjacency matrix
    adj_matrix = nx.to_numpy_array(graph, nodelist = node_ids)
    adj_matrix_tensor = torch.tensor(adj_matrix, dtype=torch.float32)
    return  node_features_tensor, adj_matrix_tensor, edge_features



# --- 4. Main Loop ---
def main_simulation(train_data_dir, val_data_dir, test_data_dir,
             gnn_model, heuristic_algo, train_iters):
   train_files = os.listdir(train_data_dir)
   val_files = os.listdir(val_data_dir)
   test_files = os.listdir(test_data_dir)
    # --- Training Loop ---
   for i in range(train_iters):
     for filename in train_files:
      loaded_generator, graph, feature_data = load_and_create_dataset(os.path.join(train_data_dir,filename), current_time=i)
      node_features, adj_matrix, edge_features = prepare_gnn_input(graph, feature_data)
      output = gnn_model(adj_matrix, node_features)
      action = heuristic_algo(output, feature_data)
      # simulate system behavior with the selected action, update system, backpropagate to update GNN parameters
      # calculate rewards, update GNN weights based on your loss function
      print(f"Selected job: {action}")

    # --- Validation Loop ---
    for filename in val_files:
      loaded_generator, graph, feature_data = load_and_create_dataset(os.path.join(val_data_dir,filename), current_time=i)
      node_features, adj_matrix, edge_features = prepare_gnn_input(graph, feature_data)
      output = gnn_model(adj_matrix, node_features)
      action = heuristic_algo(output, feature_data)
    # simulate system behavior with the selected action, update system, calculate evaluation metrics

   # --- Test Loop ---
    for filename in test_files:
      loaded_generator, graph, feature_data = load_and_create_dataset(os.path.join(test_data_dir,filename), current_time=i)
      node_features, adj_matrix, edge_features = prepare_gnn_input(graph, feature_data)
      output = gnn_model(adj_matrix, node_features)
      action = heuristic_algo(output, feature_data)
    # simulate system behavior with the selected action, update system, calculate evaluation metrics

# ----- Generate and Save Multiple Datasets -----
num_datasets = 10000
train_split = 0.7
val_split = 0.2
test_split = 0.1

train_data_dir = 'fms_train_data'
val_data_dir = 'fms_val_data'
test_data_dir = 'fms_test_data'

os.makedirs(train_data_dir, exist_ok = True)
os.makedirs(val_data_dir, exist_ok = True)
os.makedirs(test_data_dir, exist_ok = True)

for i in range(num_datasets):
    generator = FMSDatasetGenerator(
        num_jobs=(20, 100),  # Variable number of jobs
        num_machines=10,
        num_ops_per_job_range=(3, 8),
        processing_time_range=(2, 20),
        failure_rate=0.05,
        setup_time_range=(0, 3),
        resource_capacity = 6,
        load_factor = 0.6,
        dynamic_arrival=True,
        fixed_routing = False
    )
    if i < num_datasets * train_split:
        save_dir = train_data_dir
    elif i < num_datasets * (train_split + val_split):
        save_dir = val_data_dir
    else:
         save_dir = test_data_dir

    filename = f"fms_dataset_{i}.json"
    generator.save_dataset(filename, save_dir)
    print(f"Dataset {i+1}/{num_datasets} generated and saved to {filename}")

print("All datasets generated.")

# ----- Load and check one example -----
loaded_generator = FMSDatasetGenerator.load_dataset(os.path.join(train_data_dir, "fms_dataset_0.json"))
loaded_graph, loaded_heuristic = create_graph_and_features(loaded_generator,0)
print(f"Number of nodes: {len(loaded_graph.nodes)}")
print(f"Number of edges: {len(loaded_graph.edges)}")
print(f"Number of jobs: {len(loaded_generator.jobs)}")
print(f"Number of machines: {len(loaded_generator.machines)}")
print(f"Number of resources: {loaded_generator.resources}")

if  loaded_heuristic['jobs']:
   print(f"Example of job feature list: {loaded_heuristic['jobs'][0]}")
else:
    print("Job feature list is empty")
if loaded_heuristic['machines']:
   print(f"Example of machine feature list: {loaded_heuristic['machines'][0]}")
else:
    print("Machine feature list is empty")
if loaded_heuristic['operations']:
   print(f"Example of operation feature list: {loaded_heuristic['operations'][0]}")
else:
    print("Operation feature list is empty")

# --- Setup and Run ---
input_dim = 9 #number of node features
hidden_dim = 64
output_dim = 32

my_gnn_model = MyGCN(input_dim, hidden_dim, output_dim) # Dummy Implementation
my_heuristic = MyHeuristic(params = None) # Dummy Implementation
main_simulation(train_data_dir,val_data_dir, test_data_dir, my_gnn_model, my_heuristic, train_iters = 1)

IndentationError: unindent does not match any outer indentation level (<tokenize>, line 331)