In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch



  from .autonotebook import tqdm as notebook_tqdm


In [4]:

# Load Janus model and tokenizer
model_name = "deepseek-ai/Janus-1.3B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
model.eval()

# Prepare prompt context
system_prompt = {"role": "system", "content": "You are an expert in Job Shop Scheduling Problem."}

user_prompt = {
    "role": "user",
    "content": "Optimize schedule for 3 Jobs across 3 Machines to minimize makespan. Each job involves a series of Operations needing specific machines and times. Operations are processed in order, without interruption, on a single Machine at a time.\n\nProblem:\nJob 0 consists of the following Operations:\nOperation 0 on Machine 0 duration 105 mins.\nOperation 1 on Machine 1 duration 29 mins.\nOperation 2 on Machine 2 duration 213 mins.\n\nJob 1 consists of the following Operations:\nOperation 0 on Machine 2 duration 193 mins.\nOperation 1 on Machine 1 duration 18 mins.\nOperation 2 on Machine 0 duration 213 mins.\n\nJob 2 consists of the following Operations:\nOperation 0 on Machine 0 duration 78 mins.\nOperation 1 on Machine 2 duration 74 mins.\nOperation 2 on Machine 1 duration 221 mins."
}

# Convert to Janus format
messages = [system_prompt, user_prompt]
chat_template = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

# Tokenize input
inputs = tokenizer(chat_template, return_tensors="pt").to(model.device)

# Generate solution
with torch.no_grad():
    output = model.generate(
        **inputs,
        max_new_tokens=512,
        temperature=0.7,
        top_p=0.95,
        do_sample=True
    )

# Decode and print
response = tokenizer.decode(output[0], skip_special_tokens=True)
print("\n=== Model Output ===\n")
print(response)

# Note: For best output, run this in an environment with sufficient memory (>=16GB GPU).


ValueError: The checkpoint you are trying to load has model type `multi_modality` but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date.

In [5]:
from transformers import AutoTokenizer, AutoModelForCausalLM

model_name = "deepseek-ai/deepseek-coder-1.3b-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name).eval()

# 示例 prompt
prompt = """You are an expert in job shop scheduling.
Optimize the following schedule to minimize makespan:
Job 0: M0-105, M1-29, M2-213
Job 1: M2-193, M1-18, M0-213
Job 2: M0-78, M2-74, M1-221
"""

inputs = tokenizer(prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=300)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

KeyboardInterrupt: 

In [6]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

model_name = "deepseek-ai/deepseek-coder-1.3b-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
model.eval()

# 构造 prompt：描述一个 JSSP 问题
prompt = """You are an expert in Job Shop Scheduling Problem (JSSP).
Provide a Python script that solves the following JSSP instance and calculates the makespan.

Problem:
Job 0: Operation 0 on Machine 0 for 105 mins, Operation 1 on Machine 1 for 29 mins, Operation 2 on Machine 2 for 213 mins.
Job 1: Operation 0 on Machine 2 for 193 mins, Operation 1 on Machine 1 for 18 mins, Operation 2 on Machine 0 for 213 mins.
Job 2: Operation 0 on Machine 0 for 78 mins, Operation 1 on Machine 2 for 74 mins, Operation 2 on Machine 1 for 221 mins.

Write a Python function to solve this JSSP instance using greedy or heuristic algorithm and return the makespan.
"""

inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens=512, do_sample=False)

print(tokenizer.decode(outputs[0], skip_special_tokens=True))

ImportError: Using `low_cpu_mem_usage=True` or a `device_map` requires Accelerate: `pip install 'accelerate>=0.26.0'`

In [7]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

In [8]:


# 模型名
model_name = "deepseek-ai/deepseek-coder-1.3b-base"

# 加载 tokenizer 和模型
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)
model.eval()

# 自然语言任务指令 + JSSP 问题
prompt = """You are an expert in Job Shop Scheduling Problem.
Task: Provide the steps of a solution for the JSSP problem and determine the makespan.
Problem:
Optimize schedule for 3 Jobs across 3 Machines to minimize makespan. Each job involves a series of Operations needing specific machines and times. Operations are processed in order, without interruption, on a single Machine at a time.

Job 0:
  Operation 0 on Machine 0 duration 105 mins.
  Operation 1 on Machine 1 duration 29 mins.
  Operation 2 on Machine 2 duration 213 mins.

Job 1:
  Operation 0 on Machine 2 duration 193 mins.
  Operation 1 on Machine 1 duration 18 mins.
  Operation 2 on Machine 0 duration 213 mins.

Job 2:
  Operation 0 on Machine 0 duration 78 mins.
  Operation 1 on Machine 2 duration 74 mins.
  Operation 2 on Machine 1 duration 221 mins.

Solution:"""

# 编码输入
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

# 生成回答
outputs = model.generate(**inputs, max_new_tokens=500, temperature=0.7, do_sample=True)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)

# 只取生成的部分
print(response[len(prompt):].strip())

ImportError: Using `low_cpu_mem_usage=True` or a `device_map` requires Accelerate: `pip install 'accelerate>=0.26.0'`

In [11]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# 指定模型
model_name = "deepseek-ai/deepseek-coder-1.3b-base"

# 加载 tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# 选择设备：如果有 GPU 就用 GPU，没有就用 CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# 加载模型并放到设备上（不使用 accelerate）
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
)
model.to(device)
model.eval()

Using device: cuda


KeyboardInterrupt: 

In [None]:
prompt = "'''Provide a solution for this JSSP: Job 0 - M0:5, M1:3; Job 1 - M1:2, M0:4. Return the schedule and makespan.'''\n"

inputs = tokenizer(prompt, return_tensors="pt").to(device)
outputs = model.generate(**inputs, max_new_tokens=100)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# 模型名
model_name = "deepseek-ai/deepseek-coder-1.3b-base"

# 加载 tokenizer 和模型
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name,device_map="auto", torch_dtype=torch.float16)
model.eval()


KeyboardInterrupt: 

In [None]:

# 自然语言任务指令 + JSSP 问题
prompt = """You are an expert in Job Shop Scheduling Problem.
Task: Provide the steps of a solution for the JSSP problem and determine the makespan.
Problem:
Optimize schedule for 3 Jobs across 3 Machines to minimize makespan. Each job involves a series of Operations needing specific machines and times. Operations are processed in order, without interruption, on a single Machine at a time.

Job 0:
  Operation 0 on Machine 0 duration 105 mins.
  Operation 1 on Machine 1 duration 29 mins.
  Operation 2 on Machine 2 duration 213 mins.

Job 1:
  Operation 0 on Machine 2 duration 193 mins.
  Operation 1 on Machine 1 duration 18 mins.
  Operation 2 on Machine 0 duration 213 mins.

Job 2:
  Operation 0 on Machine 0 duration 78 mins.
  Operation 1 on Machine 2 duration 74 mins.
  Operation 2 on Machine 1 duration 221 mins.

Solution:"""

# 编码输入
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

# 生成回答
outputs = model.generate(**inputs, max_new_tokens=500, temperature=0.7, do_sample=True)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)

# 只取生成的部分
print(response[len(prompt):].strip())

In [6]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch


In [9]:
# 模型名
model_name = "EleutherAI/gpt-neo-125m"




In [10]:
# 加载 tokenizer 和模型
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name,device_map="auto", torch_dtype=torch.float16)
model.eval()

GPTNeoForCausalLM(
  (transformer): GPTNeoModel(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(2048, 768)
    (drop): Dropout(p=0.0, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPTNeoBlock(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPTNeoAttention(
          (attention): GPTNeoSelfAttention(
            (attn_dropout): Dropout(p=0.0, inplace=False)
            (resid_dropout): Dropout(p=0.0, inplace=False)
            (k_proj): Linear(in_features=768, out_features=768, bias=False)
            (v_proj): Linear(in_features=768, out_features=768, bias=False)
            (q_proj): Linear(in_features=768, out_features=768, bias=False)
            (out_proj): Linear(in_features=768, out_features=768, bias=True)
          )
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPTNeoMLP(
          (c_fc): Linear(in_features=768, out_features=3072, bias=True)
          (c_proj): Linear(in_fe

In [1]:

# 自然语言任务指令 + JSSP 问题
prompt = """You are an expert in Job Shop Scheduling Problem.
Task: Provide a detailed, step-by-step solution for the JSSP problem and calculate the final makespan. Please explain the reasoning for each step.

Problem:
Optimize schedule for 3 Jobs across 3 Machines to minimize makespan. Each job involves a series of Operations needing specific machines and times. Operations are processed in order, without interruption, on a single Machine at a time.

Job 0:
  Operation 0 on Machine 0 duration 105 mins.
  Operation 1 on Machine 1 duration 29 mins.
  Operation 2 on Machine 2 duration 213 mins.

Job 1:
  Operation 0 on Machine 2 duration 193 mins.
  Operation 1 on Machine 1 duration 18 mins.
  Operation 2 on Machine 0 duration 213 mins.

Job 2:
  Operation 0 on Machine 0 duration 78 mins.
  Operation 1 on Machine 2 duration 74 mins.
  Operation 2 on Machine 1 duration 221 mins.

Solution (explain each step and then give the final makespan):"""



In [None]:
# 编码输入
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

# 生成回答
#outputs = model.generate(**inputs, max_new_tokens=500, temperature=0.7, do_sample=True)
outputs = model.generate(**inputs, max_new_tokens=500, temperature=0.2, do_sample=False, num_beams=5)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)

# 只取生成的部分
print(response[len(prompt):].strip())