Consider the following model initialization, taken from the 2048 Jupyter notebook:
import art
from dotenv import load_dotenv
from openpipe.client import AsyncOpenPipe
import random
load_dotenv()
random.seed(42)
# Declare the model
model = art.TrainableModel(
name="010",
project="2048-multi-turn",
base_model="Qwen/Qwen2.5-3B-Instruct",
# To run on a T4, we need to override some config defaults.
_internal_config=art.dev.InternalModelConfig(
init_args=art.dev.InitArgs(
max_seq_length=8192,
),
engine_args=art.dev.EngineArgs(
enforce_eager=True,
gpu_memory_utilization=0.8,
num_scheduler_steps=1,
),
),
)
# Initialize the server
api = art.LocalAPI(
# Normally we don't want to run the server in-process, but for the output
# to show up properly on Google Colab we'll enable this.
in_process=True
)
# Register the model with the local API (sets up logging, inference, and training)
await model.register(api)
Most of it can be run multiple times without error. However, if I rerun the entire notebook, or if I call await model.register(api) by itself in a later cell, I see the following error:
File ~/sky_workdir/src/art/model.py:92, in TrainableModel.register(self, api, _openai_client_config)
[86](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/model.py:86) async def register(
[87](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/model.py:87) self,
[88](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/model.py:88) api: "LocalAPI",
[89](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/model.py:89) _openai_client_config: dev.OpenAIServerConfig | None = None,
[90](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/model.py:90) ) -> None:
[91](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/model.py:91) await super().register(api)
---> [92](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/model.py:92) base_url, api_key = await api._prepare_backend_for_training(
[93](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/model.py:93) self, _openai_client_config
[94](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/model.py:94) )
[95](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/model.py:95) self.base_url = base_url
[96](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/model.py:96) self.api_key = api_key
File ~/sky_workdir/src/art/local/api.py:186, in LocalAPI._prepare_backend_for_training(self, model, config)
[180](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/local/api.py:180) async def _prepare_backend_for_training(
[181](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/local/api.py:181) self,
[182](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/local/api.py:182) model: TrainableModel,
[183](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/local/api.py:183) config: dev.OpenAIServerConfig | None,
[184](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/local/api.py:184) ) -> tuple[str, str]:
[185](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/local/api.py:185) service = await self._get_service(model)
--> [186](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/local/api.py:186) await service.start_openai_server(config=config)
[187](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/local/api.py:187) server_args = (config or {}).get("server_args", {})
[189](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/local/api.py:189) base_url = f"http://{server_args.get('host', '0.0.0.0')}:{server_args.get('port', 8000)}/v1"
File ~/sky_workdir/src/art/local/service.py:54, in ModelService.start_openai_server(self, config)
[51](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/local/service.py:51) self.state.trainer.save_model(lora_path)
[52](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/local/service.py:52) await self.stop_openai_server()
[53](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/local/service.py:53) self._openai_server_task = await openai_server_task(
---> [54](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/local/service.py:54) state=self.state.vllm,
[55](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/local/service.py:55) config=dev.get_openai_server_config(
[56](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/local/service.py:56) model_name=self.model_name,
[57](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/local/service.py:57) base_model=self.base_model,
[58](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/local/service.py:58) log_file=f"{self.output_dir}/logs/vllm.log",
[59](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/local/service.py:59) lora_path=lora_path,
[60](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/local/service.py:60) config=config,
[61](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/local/service.py:61) ),
[62](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/local/service.py:62) )
[63](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/local/service.py:63) self._set_lora(lora_path)
File ~/miniconda3/lib/python3.10/functools.py:981, in cached_property.__get__(self, instance, owner)
[979](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/miniconda3/lib/python3.10/functools.py:979) val = cache.get(self.attrname, _NOT_FOUND)
[980](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/miniconda3/lib/python3.10/functools.py:980) if val is _NOT_FOUND:
--> [981](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/miniconda3/lib/python3.10/functools.py:981) val = self.func(instance)
[982](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/miniconda3/lib/python3.10/functools.py:982) try:
[983](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/miniconda3/lib/python3.10/functools.py:983) cache[self.attrname] = val
File ~/sky_workdir/src/art/local/service.py:39, in ModelService.state(self)
[35](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/local/service.py:35) @functools.cached_property
[36](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/local/service.py:36) def state(self) -> "ModelState":
[37](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/local/service.py:37) from .state import ModelState
---> [39](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/local/service.py:39) return ModelState(self.config)
File ~/sky_workdir/src/art/local/state.py:80, in ModelState.__init__(self, config)
[75](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/local/state.py:75) return from_engine_args(engine_args, *args, **kwargs)
[77](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/local/state.py:77) AsyncLLMEngine.from_engine_args = _from_engine_args
[78](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/local/state.py:78) self.model, self.tokenizer = cast(
[79](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/local/state.py:79) tuple[CausallLM, PreTrainedTokenizerBase],
---> [80](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/local/state.py:80) unsloth.FastLanguageModel.from_pretrained(**config.get("init_args", {})),
[81](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/local/state.py:81) )
[82](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/local/state.py:82) AsyncLLMEngine.from_engine_args = from_engine_args
[83](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/src/art/local/state.py:83) torch.cuda.empty_cache = empty_cache
File ~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/loader.py:363, in FastLanguageModel.from_pretrained(model_name, max_seq_length, dtype, load_in_4bit, load_in_8bit, full_finetuning, token, device_map, rope_scaling, fix_tokenizer, trust_remote_code, use_gradient_checkpointing, resize_model_vocab, revision, use_exact_model_name, fast_inference, gpu_memory_utilization, float8_kv_cache, random_state, max_lora_rank, disable_log_stats, *args, **kwargs)
[360](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/loader.py:360) pass
[361](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/loader.py:361) pass
--> [363](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/loader.py:363) model, tokenizer = dispatch_model.from_pretrained(
[364](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/loader.py:364) model_name = model_name,
[365](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/loader.py:365) max_seq_length = max_seq_length,
[366](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/loader.py:366) dtype = _get_dtype(dtype),
[367](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/loader.py:367) load_in_4bit = load_in_4bit,
[368](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/loader.py:368) token = token,
[369](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/loader.py:369) device_map = device_map,
[370](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/loader.py:370) rope_scaling = rope_scaling,
[371](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/loader.py:371) fix_tokenizer = fix_tokenizer,
[372](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/loader.py:372) model_patcher = dispatch_model,
[373](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/loader.py:373) tokenizer_name = tokenizer_name,
[374](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/loader.py:374) trust_remote_code = trust_remote_code,
[375](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/loader.py:375) revision = revision if not is_peft else None,
[376](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/loader.py:376)
[377](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/loader.py:377) fast_inference = fast_inference,
[378](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/loader.py:378) gpu_memory_utilization = gpu_memory_utilization,
[379](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/loader.py:379) float8_kv_cache = float8_kv_cache,
[380](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/loader.py:380) random_state = random_state,
[381](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/loader.py:381) max_lora_rank = max_lora_rank,
[382](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/loader.py:382) disable_log_stats = disable_log_stats,
[383](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/loader.py:383) *args, **kwargs,
[384](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/loader.py:384) )
[386](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/loader.py:386) if resize_model_vocab is not None:
[387](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/loader.py:387) model.resize_token_embeddings(resize_model_vocab)
File ~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/qwen2.py:87, in FastQwen2Model.from_pretrained(model_name, max_seq_length, dtype, load_in_4bit, token, device_map, rope_scaling, fix_tokenizer, model_patcher, tokenizer_name, trust_remote_code, **kwargs)
[72](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/qwen2.py:72) @staticmethod
[73](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/qwen2.py:73) def from_pretrained(
[74](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/qwen2.py:74) model_name = "Qwen/Qwen2-7B",
(...)
[85](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/qwen2.py:85) **kwargs,
[86](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/qwen2.py:86) ):
---> [87](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/qwen2.py:87) return FastLlamaModel.from_pretrained(
[88](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/qwen2.py:88) model_name = model_name,
[89](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/qwen2.py:89) max_seq_length = max_seq_length,
[90](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/qwen2.py:90) dtype = dtype,
[91](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/qwen2.py:91) load_in_4bit = load_in_4bit,
[92](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/qwen2.py:92) token = token,
[93](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/qwen2.py:93) device_map = device_map,
[94](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/qwen2.py:94) rope_scaling = rope_scaling,
[95](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/qwen2.py:95) fix_tokenizer = fix_tokenizer,
[96](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/qwen2.py:96) model_patcher = FastQwen2Model,
[97](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/qwen2.py:97) tokenizer_name = tokenizer_name,
[98](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/qwen2.py:98) trust_remote_code = trust_remote_code,
[99](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/qwen2.py:99) **kwargs,
[100](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/qwen2.py:100) )
File ~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/llama.py:1819, in FastLlamaModel.from_pretrained(model_name, max_seq_length, dtype, load_in_4bit, token, device_map, rope_scaling, fix_tokenizer, model_patcher, tokenizer_name, trust_remote_code, fast_inference, gpu_memory_utilization, float8_kv_cache, random_state, max_lora_rank, disable_log_stats, **kwargs)
[1816](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/llama.py:1816) pass
[1818](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/llama.py:1818) # Load vLLM first
-> [1819](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/llama.py:1819) llm = load_vllm(**load_vllm_kwargs)
[1821](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/llama.py:1821) # Convert to HF format
[1822](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth/models/llama.py:1822) _, quant_state_dict = get_vllm_state_dict(llm, config = model_config)
File ~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth_zoo/vllm_utils.py:1051, in load_vllm(model_name, config, gpu_memory_utilization, max_seq_length, dtype, training, float8_kv_cache, random_state, enable_lora, max_lora_rank, max_loras, use_async, use_engine, disable_log_stats, enforce_eager, enable_prefix_caching, compilation_config, conservativeness, max_logprobs, use_bitsandbytes)
[1046](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth_zoo/vllm_utils.py:1046) print(
[1047](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth_zoo/vllm_utils.py:1047) f"Unsloth: Retrying vLLM to process {approx_max_num_seqs} sequences and {max_num_batched_tokens} tokens in tandem.\n"\
[1048](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth_zoo/vllm_utils.py:1048) f"Error:\n{error}"
[1049](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth_zoo/vllm_utils.py:1049) )
[1050](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth_zoo/vllm_utils.py:1050) else:
-> [1051](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth_zoo/vllm_utils.py:1051) raise RuntimeError(error)
[1052](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth_zoo/vllm_utils.py:1052) pass
[1053](https://vscode-remote+ssh-002dremote-002bart.vscode-resource.vscode-cdn.net/root/sky_workdir/examples/2048/~/sky_workdir/.venv/lib/python3.10/site-packages/unsloth_zoo/vllm_utils.py:1053) pass
RuntimeError: Sleep mode can only be used for one instance per process.
It would be great if ART had some kind of check that ran when a model attempted to register a new API that ensured an existing API was not already registered. If one is already registered, I'd like to see a warning but not have the model throw an error. This would make my Jupyter developer experience so much better! Right now I have to be careful about rerunning the notebook, which introduces friction when I want to run multiple cells.
This is all based on the assumption that we ever need to register more than one API per model.
TL;DR:
Calling
model.register(api)multiple times for the same model and API results in an error, and makes it hard to rerun a Jupyter notebook.Longer Version:
Consider the following model initialization, taken from the 2048 Jupyter notebook:
Most of it can be run multiple times without error. However, if I rerun the entire notebook, or if I call
await model.register(api)by itself in a later cell, I see the following error:Proposed solution:
It would be great if ART had some kind of check that ran when a model attempted to register a new API that ensured an existing API was not already registered. If one is already registered, I'd like to see a warning but not have the model throw an error. This would make my Jupyter developer experience so much better! Right now I have to be careful about rerunning the notebook, which introduces friction when I want to run multiple cells.
This is all based on the assumption that we ever need to register more than one API per model.