# Tutorial for Bllossom on Gradio!

## 01. Import all packages

In [1]:
!pip install -q gradio

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.2/57.2 MB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m320.4/320.4 kB[0m [31m17.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m94.8/94.8 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.3/11.3 MB[0m [31m73.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m73.2/73.2 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.3/62.3 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
import gradio as gr
import os
import torch
from transformers import AutoProcessor, MllamaForConditionalGeneration
from PIL import Image

## 02. Load your model

In [3]:
# Determine the device (GPU if available, else CPU)
device = "cuda" if torch.cuda.is_available() else "cpu"

In [4]:
# Load the model and processor
model_name = """Bllossom/llama-3.2-Korean-Bllossom-AICA-5B"""
model = MllamaForConditionalGeneration.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16,
    device_map='cuda:0'
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/5.22k [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/84.7k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/4.58G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/835M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/168 [00:00<?, ?B/s]

In [5]:
# Move the model to the appropriate device (GPU if available)
model.to(device)
processor = AutoProcessor.from_pretrained(model_name)
# VRAM을 많이 먹을 경우 아래 코드 실행
model.eval()

preprocessor_config.json:   0%|          | 0.00/477 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/55.9k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

chat_template.json:   0%|          | 0.00/5.42k [00:00<?, ?B/s]

MllamaForConditionalGeneration(
  (vision_model): MllamaVisionModel(
    (patch_embedding): Conv2d(3, 1280, kernel_size=(14, 14), stride=(14, 14), padding=valid, bias=False)
    (gated_positional_embedding): MllamaPrecomputedPositionEmbedding(
      (tile_embedding): Embedding(9, 8197120)
    )
    (pre_tile_positional_embedding): MllamaPrecomputedAspectRatioEmbedding(
      (embedding): Embedding(9, 5120)
    )
    (post_tile_positional_embedding): MllamaPrecomputedAspectRatioEmbedding(
      (embedding): Embedding(9, 5120)
    )
    (layernorm_pre): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
    (layernorm_post): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
    (transformer): MllamaVisionEncoder(
      (layers): ModuleList(
        (0-31): 32 x MllamaVisionEncoderLayer(
          (self_attn): MllamaVisionSdpaAttention(
            (q_proj): Linear(in_features=1280, out_features=1280, bias=False)
            (k_proj): Linear(in_features=1280, out_features=1280, b

In [6]:
def predict(image=None, text=None):
  torch.cuda.empty_cache()
  with torch.no_grad():
      if image is not None:
        w,h = image.size
        if w < 500 or h < 500:
          image = image.resize((w*2,h*2))

      # LLM으로 사용할 때
      if image is None:
          messages = [
              {"role": "user", "content": [
                  {"type": "text", "text": text}  # Add the user-provided text input
              ]}
          ]
          inputs = processor.tokenizer.apply_chat_template(messages, add_generation_prompt=True,return_tensors='pt').to(model.device)
          outputs = model.generate(inputs, max_new_tokens=512,temperature=0.1,eos_token_id=processor.tokenizer.convert_tokens_to_ids('<|eot_id|>'))

      # VLM으로 사용할 때
      else:
          # Prepare the input messages
          messages = [
              {"role": "user", "content": [
                  {"type": "image"},  # Specify that an image is provided
                  {"type": "text", "text": text}  # Add the user-provided text input
              ]}
          ]
          # Create the input text using the processor's chat template
          input_text = processor.apply_chat_template(messages, add_generation_prompt=True)

          # Process the inputs and move to the appropriate device
          inputs = processor(image, input_text,add_special_tokens=False, return_tensors="pt").to(device)


      # Generate a response from the model
          with torch.cuda.amp.autocast():
            outputs = model.generate(
                **inputs,
                max_new_tokens=512,
                use_cache=True,
                temperature=0.1,
                eos_token_id=processor.tokenizer.convert_tokens_to_ids('<|eot_id|>'),
                )

      # Decode the output to return the final response
      response = processor.decode(outputs[0])
      response = response[
          response.rindex('<|start_header_id|>assistant<|end_header_id|>\n\n')+\
          len('<|start_header_id|>assistant<|end_header_id|>\n\n'):].replace('<|eot_id|>','')

      return response

In [None]:
# Define the Gradio interface
interface = gr.Interface(
    fn=predict,
    inputs=[
        gr.Image(type="pil", label="Image Input"),  # Image input with label
        gr.Textbox(label="Text Input")  # Textbox input with label
    ],
    outputs=gr.Textbox(label="Generated Response"),  # Output with a more descriptive label
    title="Bllossom-AICA Demo",  # Title of the interface
    description="This demo uses Bllossom-AICA model to generate responses based on an image and text input.",  # Short description
    theme="compact",  # Using a compact theme for a cleaner look,
    batch=False,
)

# Launch the interface
interface.launch(debug=True, share=True)


Sorry, we can't find the page you are looking for.


Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://c5926853d5a781cbe6.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/gradio/queueing.py", line 625, in process_events
    response = await route_utils.call_process_api(
  File "/usr/local/lib/python3.10/dist-packages/gradio/route_utils.py", line 322, in call_process_api
    output = await app.get_blocks().process_api(
  File "/usr/local/lib/python3.10/dist-packages/gradio/blocks.py", line 2043, in process_api
    inputs = await self.preprocess_data(
  File "/usr/local/lib/python3.10/dist-packages/gradio/blocks.py", line 1738, in preprocess_data
    processed_input.append(block.preprocess(inputs_cached))
  File "/usr/local/lib/python3.10/dist-packages/gradio/components/image.py", line 232, in preprocess
    im = PIL.Image.open(file_path)
  File "/usr/local/lib/python3.10/dist-packages/PIL/Image.py", line 3536, in open
    raise UnidentifiedImageError(msg)
PIL.UnidentifiedImageError: cannot identify image file '/tmp/gradio/679f5074a7950e980fc7ca7aaf8f0b8efec57e8093a397c0082

# 관세청, 외국산 물품을 국산으로 숙여 조달 납품하는 부정행위 집중단속

- **조달청, 산업부**과 협업하여 입수한 자료를 통한 분석을 통해 기획조사 지속
- 조달 계약자료 및 관계청 자료의 데이터를 분석하며 시스템 구축 진행 중
- 관세청은 공정기관 조달 납품업체에 의해 납품된 외국산 물품을 국산으로 둔갑시키는 부정납품 행위를 차단하기 위해 집중단속 '23.8월 실시

## 공공조달 부정납품 단속 현황 ('2023년 기준)
| 구분          | '19년  | '20년  | '21년  | '22년  | '23년 1~5월 |
|---------------|-------|-------|-------|-------|-------------|
| 사건수(건,원) | 9(11) | 6(7)  | 15(28)| 11(12) | 3(3)       |
| 금액(억원)     | 185  | 634  | 1,224 | 1,244 | 139       |
| 주요 품목      | 전자칠판 | 바닥마감제(물론어림보드) | 의류 패셔너토(터피스) 등 | 액정모니터 의류 | 무정전전원 장치, 의류 |

## 조사 내용
- 조달 납품 업체들은 저가의 외국산 물품을 생산하지 않고 가의 성표로 국산으로 둔갑시킴.
- 저가로 수입된 국산 물품, 상표라벨 추가 등으로 국산으로 간주함.
- 공공조달 분야의 경쟁력 강화 및 소비자 보호를 위해 부정행위 단속.

## 부정납품 단속 사례
- 저가 외국산 근무복 수입 후 원산지표시를 제거한 뒤, 이를 근무복 12만점으로 위장.
- 국가 기관에 위조 근무복 200개 전달.

## 추가 단속 발표 ('22년 11월)
- 단순 조립 후 제조된 국산 근무복으로 부정납품 방지.
```