In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
%cd /content/drive/MyDrive/

/content/drive/MyDrive


In [3]:
!git clone https://github.com/deepseek-ai/Janus.git janus

fatal: destination path 'janus' already exists and is not an empty directory.


In [4]:
ls -la

total 593291
-rw------- 1 root root     30020 Jan 22 17:21 'ASA cover letter (1).pdf'
-rw------- 1 root root    599831 Jan 22 17:22 'ASA cover letter.docx'
-rw------- 1 root root       188 Jan 22 17:21 'ASA cover letter.gdoc'
-rw------- 1 root root     30020 Jan 22 17:22 'ASA cover letter.pdf'
-rw------- 1 root root       188 Jan  9 22:16 'CAP 6640- COMP UNDERST OF NATURAL LANG - location ....gdoc'
drwx------ 2 root root      4096 Sep  8 21:16 [0m[01;34m'Colab Notebooks'[0m/
-rw------- 1 root root       188 Jan 14 13:12 'Cover letter program assistant course.gdoc'
-rw------- 1 root root 148556487 Feb 14 20:50  GMT20250121-135550_Recording_1920x1080.mp4
-rw------- 1 root root 141195634 Feb 14 20:51  GMT20250123-135644_Recording_1920x1080.mp4
-rw------- 1 root root 145634039 Feb 14 20:51  GMT20250128-135601_Recording_1920x1080.mp4
-rw------- 1 root root 171464649 Feb 14 20:51  GMT20250130-135629_Recording_1920x1080.mp4
-rw------- 1 root root       188 Oct 12 13:33 'HTTPS Sessions.gshe

In [5]:
!ls /content/drive/MyDrive/janus

demo			 interactivechat.py	    LICENSE-CODE    README.md
generation_inference.py  janus			    LICENSE-MODEL   requirements.txt
images			 janus.egg-info		    Makefile
inference.py		 janus_pro_tech_report.pdf  pyproject.toml


In [6]:
%cd /content/drive/MyDrive/janus

/content/drive/MyDrive/janus


In [7]:
!pip install -e .

Obtaining file:///content/drive/MyDrive/janus
  Installing build dependencies ... [?25l[?25hdone
  Checking if build backend supports build_editable ... [?25l[?25hdone
  Getting requirements to build editable ... [?25l[?25hdone
  Preparing editable metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: janus
  Building editable for janus (pyproject.toml) ... [?25l[?25hdone
  Created wheel for janus: filename=janus-1.0.0-0.editable-py3-none-any.whl size=15925 sha256=d7a496747f925809fe1285be1d977117b9f162595b92e0afe6aad6f154efca2d
  Stored in directory: /tmp/pip-ephem-wheel-cache-9js2111q/wheels/ce/ba/10/eeffa37f351440092dd4ff06df0d9e3a63efd23d89971f3159
Successfully built janus
Installing collected packages: janus
  Attempting uninstall: janus
    Found existing installation: janus 1.0.0
    Uninstalling janus-1.0.0:
      Successfully uninstalled janus-1.0.0
Successfully installed janus-1.0.0


In [1]:
import os
import PIL.Image
import torch
import numpy as np
from transformers import AutoModelForCausalLM
from janus.models import MultiModalityCausalLM, VLChatProcessor

# specify the path to the model
model_path = "deepseek-ai/Janus-1.3B"
vl_chat_processor: VLChatProcessor = VLChatProcessor.from_pretrained(model_path)
tokenizer = vl_chat_processor.tokenizer

# Load model with explicit legacy attention (avoiding FlashAttention)
vl_gpt: MultiModalityCausalLM = AutoModelForCausalLM.from_pretrained(
    model_path,
    trust_remote_code=True,
    attn_implementation="eager"  # Force the use of CPU-compatible attention
)

# Move model to CPU and set precision to float32
vl_gpt = vl_gpt.to(torch.float32).cpu().eval()  # Ensure model is on CPU with float32 precision

conversation = [
    {"role": "User", "content": "A stunning princess from kabul in red, white traditional clothing, blue eyes, brown hair"},
    {"role": "Assistant", "content": ""}
]

sft_format = vl_chat_processor.apply_sft_template_for_multi_turn_prompts(
    conversations=conversation,
    sft_format=vl_chat_processor.sft_format,
    system_prompt="",
)

prompt = sft_format + vl_chat_processor.image_start_tag

@torch.inference_mode()
def generate(
    mmgpt: MultiModalityCausalLM,
    vl_chat_processor: VLChatProcessor,
    prompt: str,
    temperature: float = 1,
    parallel_size: int = 16,
    cfg_weight: float = 5,
    image_token_num_per_image: int = 576,
    img_size: int = 384,
    patch_size: int = 16,
):
    input_ids = vl_chat_processor.tokenizer.encode(prompt)
    input_ids = torch.LongTensor(input_ids)

    # Move input tensors to CPU
    tokens = torch.zeros((parallel_size * 2, len(input_ids)), dtype=torch.int).cpu()
    for i in range(parallel_size * 2):
        tokens[i, :] = input_ids
        if i % 2 != 0:
            tokens[i, 1:-1] = vl_chat_processor.pad_id

    inputs_embeds = mmgpt.language_model.get_input_embeddings()(tokens)

    generated_tokens = torch.zeros((parallel_size, image_token_num_per_image), dtype=torch.int).cpu()

    for i in range(image_token_num_per_image):
        outputs = mmgpt.language_model.model(inputs_embeds=inputs_embeds, use_cache=True, past_key_values=outputs.past_key_values if i != 0 else None)
        hidden_states = outputs.last_hidden_state

        logits = mmgpt.gen_head(hidden_states[:, -1, :])
        logit_cond = logits[0::2, :]
        logit_uncond = logits[1::2, :]

        logits = logit_uncond + cfg_weight * (logit_cond - logit_uncond)
        probs = torch.softmax(logits / temperature, dim=-1)

        next_token = torch.multinomial(probs, num_samples=1)
        generated_tokens[:, i] = next_token.squeeze(dim=-1)

        next_token = torch.cat([next_token.unsqueeze(dim=1), next_token.unsqueeze(dim=1)], dim=1).view(-1)
        img_embeds = mmgpt.prepare_gen_img_embeds(next_token)
        inputs_embeds = img_embeds.unsqueeze(dim=1)

    dec = mmgpt.gen_vision_model.decode_code(generated_tokens.to(dtype=torch.int), shape=[parallel_size, 8, img_size // patch_size, img_size // patch_size])
    dec = dec.to(torch.float32).cpu().numpy().transpose(0, 2, 3, 1)

    # Normalize and save the generated images
    dec = np.clip((dec + 1) / 2 * 255, 0, 255)

    visual_img = np.zeros((parallel_size, img_size, img_size, 3), dtype=np.uint8)
    visual_img[:, :, :] = dec

    os.makedirs('generated_samples', exist_ok=True)
    for i in range(parallel_size):
        save_path = os.path.join('generated_samples', "img_{}.jpg".format(i))
        PIL.Image.fromarray(visual_img[i]).save(save_path)

# Run the generate function
generate(vl_gpt, vl_chat_processor, prompt)


Python version is above 3.10, patching the collections module.


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.48, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama_fast.LlamaTokenizerFast'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use t

Add image tag = <image_placeholder> to the tokenizer


Flash Attention 2.0 only supports torch.float16 and torch.bfloat16 dtypes, but the current dype in LlamaForCausalLM is torch.float32. You should run training or inference using Automatic Mixed-Precision via the `with torch.autocast(device_type='torch_device'):` decorator, or load the model with the `torch_dtype` argument. Example: `model = AutoModel.from_pretrained("openai/whisper-tiny", attn_implementation="flash_attention_2", torch_dtype=torch.float16)`
The input hidden states seems to be silently casted in float32, this might be related to the fact you have upcasted embedding or layer norm layers in float32. We will cast back the input in torch.float32.


NotImplementedError: Could not run 'flash_attn::_flash_attn_forward' with arguments from the 'CPU' backend. This could be because the operator doesn't exist for this backend, or was omitted during the selective/custom build process (if using custom build). If you are a Facebook employee using PyTorch on mobile, please visit https://fburl.com/ptmfixes for possible resolutions. 'flash_attn::_flash_attn_forward' is only available for these backends: [CUDA, Meta, BackendSelect, Python, FuncTorchDynamicLayerBackMode, Functionalize, Named, Conjugate, Negative, ZeroTensor, ADInplaceOrView, AutogradOther, AutogradCPU, AutogradCUDA, AutogradHIP, AutogradXLA, AutogradMPS, AutogradIPU, AutogradXPU, AutogradHPU, AutogradVE, AutogradLazy, AutogradMTIA, AutogradPrivateUse1, AutogradPrivateUse2, AutogradPrivateUse3, AutogradMeta, AutogradNestedTensor, Tracer, AutocastCPU, AutocastXPU, AutocastMPS, AutocastCUDA, FuncTorchBatched, BatchedNestedTensor, FuncTorchVmapMode, Batched, VmapMode, FuncTorchGradWrapper, PythonTLSSnapshot, FuncTorchDynamicLayerFrontMode, PreDispatch, PythonDispatcher].

CUDA: registered at /dev/null:185 [kernel]
Meta: registered at /dev/null:184 [kernel]
BackendSelect: fallthrough registered at ../aten/src/ATen/core/BackendSelectFallbackKernel.cpp:3 [backend fallback]
Python: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:153 [backend fallback]
FuncTorchDynamicLayerBackMode: registered at ../aten/src/ATen/functorch/DynamicLayer.cpp:497 [backend fallback]
Functionalize: registered at ../aten/src/ATen/FunctionalizeFallbackKernel.cpp:349 [backend fallback]
Named: registered at ../aten/src/ATen/core/NamedRegistrations.cpp:7 [backend fallback]
Conjugate: registered at ../aten/src/ATen/ConjugateFallback.cpp:17 [backend fallback]
Negative: registered at ../aten/src/ATen/native/NegateFallback.cpp:18 [backend fallback]
ZeroTensor: registered at ../aten/src/ATen/ZeroTensorFallback.cpp:86 [backend fallback]
ADInplaceOrView: fallthrough registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:96 [backend fallback]
AutogradOther: registered at /dev/null:185 [autograd kernel]
AutogradCPU: registered at /dev/null:185 [autograd kernel]
AutogradCUDA: registered at /dev/null:185 [autograd kernel]
AutogradHIP: registered at /dev/null:185 [autograd kernel]
AutogradXLA: registered at /dev/null:185 [autograd kernel]
AutogradMPS: registered at /dev/null:185 [autograd kernel]
AutogradIPU: registered at /dev/null:185 [autograd kernel]
AutogradXPU: registered at /dev/null:185 [autograd kernel]
AutogradHPU: registered at /dev/null:185 [autograd kernel]
AutogradVE: registered at /dev/null:185 [autograd kernel]
AutogradLazy: registered at /dev/null:185 [autograd kernel]
AutogradMTIA: registered at /dev/null:185 [autograd kernel]
AutogradPrivateUse1: registered at /dev/null:185 [autograd kernel]
AutogradPrivateUse2: registered at /dev/null:185 [autograd kernel]
AutogradPrivateUse3: registered at /dev/null:185 [autograd kernel]
AutogradMeta: registered at /dev/null:185 [autograd kernel]
AutogradNestedTensor: registered at /dev/null:185 [autograd kernel]
Tracer: registered at ../torch/csrc/autograd/TraceTypeManual.cpp:294 [backend fallback]
AutocastCPU: fallthrough registered at ../aten/src/ATen/autocast_mode.cpp:321 [backend fallback]
AutocastXPU: fallthrough registered at ../aten/src/ATen/autocast_mode.cpp:463 [backend fallback]
AutocastMPS: fallthrough registered at ../aten/src/ATen/autocast_mode.cpp:209 [backend fallback]
AutocastCUDA: fallthrough registered at ../aten/src/ATen/autocast_mode.cpp:165 [backend fallback]
FuncTorchBatched: registered at ../aten/src/ATen/functorch/LegacyBatchingRegistrations.cpp:731 [backend fallback]
BatchedNestedTensor: registered at ../aten/src/ATen/functorch/LegacyBatchingRegistrations.cpp:758 [backend fallback]
FuncTorchVmapMode: fallthrough registered at ../aten/src/ATen/functorch/VmapModeRegistrations.cpp:27 [backend fallback]
Batched: registered at ../aten/src/ATen/LegacyBatchingRegistrations.cpp:1075 [backend fallback]
VmapMode: fallthrough registered at ../aten/src/ATen/VmapModeRegistrations.cpp:33 [backend fallback]
FuncTorchGradWrapper: registered at ../aten/src/ATen/functorch/TensorWrapper.cpp:207 [backend fallback]
PythonTLSSnapshot: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:161 [backend fallback]
FuncTorchDynamicLayerFrontMode: registered at ../aten/src/ATen/functorch/DynamicLayer.cpp:493 [backend fallback]
PreDispatch: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:165 [backend fallback]
PythonDispatcher: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:157 [backend fallback]


In [10]:
!pip install flash-attn --no-build-isolation


Collecting flash-attn
  Downloading flash_attn-2.7.4.post1.tar.gz (6.0 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/6.0 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/6.0 MB[0m [31m29.5 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━[0m [32m5.5/6.0 MB[0m [31m79.9 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m6.0/6.0 MB[0m [31m79.6 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.0/6.0 MB[0m [31m53.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: flash-attn
  Building wheel for flash-attn (setup.py) ... [?25l[?25hdone
  Created wheel for flash-attn: filename=flash_attn-2.7.4.post1-cp311-cp311-linux_x86_64.whl size=187815463 sha256=d9