In [1]:

# We illustrate the generation of a scheduling problem using a local model  (using chain-of-thought and few-shot prompting)
# Notice you have to have your api-key under .env

# Doing all the imports in this cell because each local GPU run needs a kernel restart. 
# Without a kernel restart, CPU is used instead of GPU after loading a local model once.
from LLM import bots
from ASP_Scheduler.problem_descriptions import all_problems
from ASP_Scheduler import scheduler
from ASP_Scheduler import schedulerBaseline
import importlib
from pprint import pprint
from openai import OpenAI
import os
from datetime import datetime
from utils import logger

os.environ["BNB_CUDA_VERSION"] = "123"   # Force bnb → libbitsandbytes_cuda123.dll for running a local model (Bits and bytes 12.4 doesn't exist yet, so we force Cuda 12.3)

###########################################################
#                        SETTINGS                         #
###########################################################

# GENERAL SETTINGS
RUN_LOCAL_MODEL = True         # Set to True to run a local model, False to run a remote model via OpenAI API
PRINT = True                    # Set to True to print intermediate outputs
PROGRAM_FOLDER = 'programs'     # Folder to save programs (set to None to disable saving)
METRICS_LOG_FILE = 'metrics/metrics.csv'

# REMOTE SETTINGS
# REMOTE_PIPE = 'deepseek' # 'deepseek' model on OpenAI API
REMOTE_PIPE = None # None defaults to meta-llama/Meta-Llama-3-8B-Instruct

# LOCAL SETTINGS
#CHECKPOINT = "meta-llama/Meta-Llama-3.1-8B-Instruct" # Gives kernel crashes after (or during) generating soft constraints.
CHECKPOINT = "meta-llama/Meta-Llama-3-8B-Instruct" # No kernel crashes, but less capable
QUANTIZATION_CONFIG = '4bit'  # '4bit' or '8bit' for 4 or 8-bit quantization (faster, less memory), None for no quantization (full model, slower, more memory)

# PROBLEM SETTINGS
PROBLEM_NAMES = list(all_problems.keys())  # To run the program for all available problem names
#PROBLEM_NAMES = ['post_enrollment_based_course_time_tabling']  # Test case for only running ONE problem
MAX_REPAIRS = 5  # Maximum number of repair attempts per statement block
RUNS_PER_PROBLEM = 2  # Number of runs per problem for averaging results

if RUN_LOCAL_MODEL:
    # To work locally, we need to manually load the pipeline 
    PIPE = bots.load_pipe(model_checkpoint=CHECKPOINT, local_dir="./local_models", quantization_config=QUANTIZATION_CONFIG)  
else:
    # For remote models, we set pipe to a string with the model name
    PIPE = REMOTE_PIPE


# Run the LLM scheduler per problem
for problem_name in PROBLEM_NAMES:
    for run_id in range(RUNS_PER_PROBLEM):        
        # Initialize the metrics logger
        logger.init_logger(filename=METRICS_LOG_FILE, problem_ID=problem_name, max_fix_attempts=MAX_REPAIRS)

        full_program = scheduler.full_ASP_program(
            all_problems[problem_name],    # Input problem specifications for examination timetabling
            pipe=PIPE,                     # Input the PIPEline object for the LLM
            printer=PRINT,                 # Set to True to print intermediate outputs
            k=MAX_REPAIRS)                              

        if PROGRAM_FOLDER is not None:
            # Save the full program to a file
            os.makedirs(PROGRAM_FOLDER, exist_ok=True)
            timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
            max_repairs_string = f"_max_{MAX_REPAIRS}_repairs" if MAX_REPAIRS is not None else ""
            program_filename = os.path.join(PROGRAM_FOLDER, f"{problem_name}{max_repairs_string}_{timestamp}.lp")
            with open(program_filename, 'w', encoding='utf-8') as f:
                f.write(full_program)
            if PRINT:
                print(f"Full program saved to {program_filename}")
        else:
            # Print the full program as it is returned by the scheduler
            print('----------------------------FULL PROGRAM----------------------------')
            print(full_program)

downloading model...


This can be used to load a bitsandbytes version that is different from the PyTorch CUDA version.
If this was unintended set the BNB_CUDA_VERSION variable to an empty string: export BNB_CUDA_VERSION=
If you use the manual override make sure the right libcudart.so is in your LD_LIBRARY_PATH
For example by adding the following to your .bashrc: export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:<path_to_cuda_dir/lib64



Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Device set to use cuda:0


loading model...
####################################################################################
PROGRAM PART DID NOT NEED REPAIR!
RESPONSE:
% Instance Template
exam(Exam, Duration, Is_large).

student(Student, Exam).

period(Date, Time, Duration, Is_late, Penalty).

room(Room, Capacity, Penalty).

order_constraint(Exam1, Exam2).

same_time_constraint(Exam1, Exam2).

different_time_constraint(Exam1, Exam2).

own_room_constraint(Exam).

Total statement blocks: 16

####################################################################################
Instance Template:
% Instance Template
exam(Exam, Duration, Is_large).

student(Student, Exam).

period(Date, Time, Duration, Is_late, Penalty).

room(Room, Capacity, Penalty).

order_constraint(Exam1, Exam2).

same_time_constraint(Exam1, Exam2).

different_time_constraint(Exam1, Exam2).

own_room_constraint(Exam).
Initial response with syntax error:
1 {{ assigned(Exam, Period, Room) : period(Period), room(Room) }} 1 :- exam(Exam,_,_).

E

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


--------------------------------------------------------------------------------
Correction attempt 1:
Corrected code:
:- room(Room, Capacity, _), Cnt > Capacity, Cnt = #count{Student : student(Student, Exam), Exam1 : exam(Exam1, _, _), assigned(Exam1, _, Room), assigned(Exam, _, Room)}.

--------------------------------------------------------------------------------
Syntax error still present: <string>:1:11-15: error: syntax error, unexpected <IDENTIFIER>

--------------------------------------------------------------------------------
Correction attempt 2:
Corrected code:
:- room(Room, Capacity, _), Cnt > Capacity, Cnt = #count{Student : student(Student, _)}/2.

--------------------------------------------------------------------------------
Syntax error still present: <string>:1:11-15: error: syntax error, unexpected <IDENTIFIER>

--------------------------------------------------------------------------------
Correction attempt 3:
Corrected code:
:- room(Room, Capacity, _), Cnt > 

<span style="color:red">**Remark George 19/10/25:**

<span style="color:red">I kept old code below in case the new "full program" above gives any issues.

<span style="color:red">After team review, I suggest removing / cleaning up.</span>

In [6]:
from LLM import bots
from ASP_Scheduler.problem_descriptions import all_problems
from ASP_Scheduler import scheduler
from ASP_Scheduler import schedulerBaseline
import importlib
from pprint import pprint
from openai import OpenAI
import os
os.environ["BNB_CUDA_VERSION"] = "123"   # Force bnb → libbitsandbytes_cuda123.dll for running a local model (Bits and bytes 12.4 doesn't exist yet, so we force Cuda 12.3)



In [None]:
from ASP_Scheduler import scheduler
# We illustrate the generation of a scheduling problem using an Inference client, in this case deepseek (using chain-of-thought and few-shot prompting)
# Notice you have to enter your api-key in line 132 of LLM/bots.py
# Notice that delays of 10 seconds have been added in scheduler.py to avoid reaching the token-limit of the inference client.

# Set the model to use
PIPE = "deepseek"
#PIPE = None 

# Run the LLM scheduler
full_program = scheduler.full_ASP_program(
    all_problems['post_enrollment_based_course_time_tabling'],    # Input problem specifications for examination timetabling
    pipe=PIPE,                                                    # Input the PIPEline object for the LLM
    printer=True,                                                 # Set to True to print intermediate outputs 
    k=5)                                                          # Number retries (0 means zero-shot)

# Print the full program as it is returned by the scheduler
print('----------------------------FULL PROGRAM----------------------------')
print(full_program)

####################################################################################
PROGRAM PART DID NOT NEED REPAIR!
RESPONSE:
event(0 .. N-1).

room(Room, Capacity).

timeslot(Timeslot, Day, Hour).

student(Student, Event).

feature_requirement(Feature, Event).

room_feature(Feature, Room).

precedence_constraint(Event1, Event2).

timeslot_constraint(Event, Timeslot).

Total statement blocks: 15

####################################################################################
Instance Template:
event(0 .. N-1).

room(Room, Capacity).

timeslot(Timeslot, Day, Hour).

student(Student, Event).

feature_requirement(Feature, Event).

room_feature(Feature, Room).

precedence_constraint(Event1, Event2).

timeslot_constraint(Event, Timeslot).


HfHubHTTPError: 402 Client Error: Payment Required for url: https://router.huggingface.co/together/v1/chat/completions (Request ID: Root=1-68f4d87a-0705bd783a73fd150d5485bc;069999be-72ea-4349-b107-83311474d14a)

You have exceeded your monthly included credits for Inference Providers. Subscribe to PRO to get 20x more monthly included credits.

In [None]:
#################################################
#                  LOCAL MODEL                  #
#################################################

# We illustrate the generation of a scheduling problem using a local model  (using chain-of-thought and few-shot prompting)
# Notice you have to have your api-key under .env

# Doing all the imports in this cell because each GPU run needs a kernel restart. 
# Without a kernel restart, CPU is used instead of GPU after loading a local model once.
from LLM import bots
from ASP_Scheduler.problem_descriptions import all_problems
from ASP_Scheduler import scheduler
from ASP_Scheduler import schedulerBaseline
import importlib
from pprint import pprint
from openai import OpenAI
import os
os.environ["BNB_CUDA_VERSION"] = "123"   # Force bnb → libbitsandbytes_cuda123.dll for running a local model (Bits and bytes 12.4 doesn't exist yet, so we force Cuda 12.3)

# To work locally, we need to manually load the pipeline 

#CHECKPOINT = "meta-llama/Meta-Llama-3.1-8B-Instruct" # Gives kernel crashes after (or during) generating soft constraints.
CHECKPOINT = "meta-llama/Meta-Llama-3-8B-Instruct" # No kernel crashes, but less capable and also not the one Jesse used

PIPE = bots.load_pipe(model_checkpoint=CHECKPOINT, local_dir="./local_models", quantization_config='4bit')  #4bit quantization is faster, but I need to restart kernel each time
#PIPE = bots.load_pipe(model_checkpoint=CHECKPOINT, local_dir="./local_models")                              #No quantization (full model)

# Run the LLM scheduler
full_program = scheduler.full_ASP_program(
    all_problems['post_enrollment_based_course_time_tabling'],      # Input problem specifications for examination timetabling
    pipe=PIPE,                                                      # Input the PIPEline object for the LLM
    printer=True,                                                   # Set to True to print intermediate outputs
    k=5)                              

# Print the full program as it is returned by the scheduler
print('----------------------------FULL PROGRAM----------------------------')
print(full_program)

downloading model...


This can be used to load a bitsandbytes version that is different from the PyTorch CUDA version.
If this was unintended set the BNB_CUDA_VERSION variable to an empty string: export BNB_CUDA_VERSION=
If you use the manual override make sure the right libcudart.so is in your LD_LIBRARY_PATH
For example by adding the following to your .bashrc: export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:<path_to_cuda_dir/lib64



Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Device set to use cuda:0


loading model...
####################################################################################
PROGRAM PART DID NOT NEED REPAIR!
RESPONSE:
% Instance Template
event(_).

room(Room, Capacity).

timeslot(Day, Hour).

student(Event).

feature_requirement(Event, Feature).

room_feature(Room, Feature).

precedence_constraint(Event1, Event2).

timeslot_constraint(Event, Timeslot).

Total statement blocks: 16

####################################################################################
Instance Template:
% Instance Template
event(_).

room(Room, Capacity).

timeslot(Day, Hour).

student(Event).

feature_requirement(Event, Feature).

room_feature(Room, Feature).

precedence_constraint(Event1, Event2).

timeslot_constraint(Event, Timeslot).
Initial response with syntax error:
1 {{ assigned(Event, Timeslot, Room) : room(Room), timeslot(Timeslot) }} 1 :- event(Event).

Error: <string>:1:4-5: error: syntax error, unexpected {

 Starting syntax repair attempts...
--------------------

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


####################################################################################
PROGRAM PART DID NOT NEED REPAIR!
RESPONSE:
:- assigned(Event, Timeslot, Room), room_feature(Room, Feature), not feature_requirement(Event, Feature).

Total statement blocks: 1

####################################################################################
- Rooms must have the required features for the event.
:- assigned(Event, Timeslot, Room), room_feature(Room, Feature), not feature_requirement(Event, Feature).

####################################################################################
PROGRAM PART DID NOT NEED REPAIR!
RESPONSE:
:- assigned(Event1, Timeslot, Room), assigned(Event2, Timeslot, Room), Event1!= Event2.

Total statement blocks: 1

####################################################################################
- There must be no more than one event in a room at a time.
:- assigned(Event1, Timeslot, Room), assigned(Event2, Timeslot, Room), Event1!= Event2.

###########

In [None]:
# We finally illustrate the generation of a scheduling problem using the baseline model.
# PIPE has to be configured as above

full_program = schedulerBaseline.full_ASP_program(
    all_problems['nurse_scheduling'], 
    pipe=PIPE, 
    printer=False)

print('----------------------------FULL PROGRAM----------------------------')
print(full_program)

In [None]:
#
# For running local models via the GPU, this cell test if cuda is available.
#
# Expected output:
#   cuda_available: True
#

import torch
print("cuda_available:", torch.cuda.is_available())
print("device_count:", torch.cuda.device_count())
print("name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else None)
print("torch.cuda:", torch.version.cuda, "  driver ok:", torch.backends.cudnn.is_available())


cuda_available: True
device_count: 1
name: NVIDIA GeForce RTX 4070 Ti
torch.cuda: 12.4   driver ok: True
