# RV-ANDROID playground

# Config

## Local

`sudo apt install python3.12-dev nvidia-cuda-toolkit bitsandbytes triton`

```
nvidia-smi
nvcc --version
```

In [None]:
# Log in HF

from huggingface_hub import login
from dotenv import load_dotenv

load_dotenv(override=True)

hf_token = os.getenv('HF_TOKEN')

login(hf_token)


## Google colab

In [2]:
!pip install -q gradio diffusers transformers accelerate torch Pillow python-dotenv torchvision
!pip install -q -U bitsandbytes

#datasets

In [3]:
# Clone RVSec
from google.colab import userdata, drive

!rm -Rf sample_data/

#https://github.com/ad17171717/YouTube-Tutorials/blob/main/Google%20Colab%20Tutorials/Google_Colab_%2B_Git_Pushing_Changes_to_a_GitHub_Repo!.ipynb
!git config --global user.name "phtcosta"
!git config --global user.email "phtcosta@gmail.com"

# https://github.com/settings/tokens
github_token = userdata.get('GITHUB_TOKEN')
!git clone --branch develop https://{github_token}@github.com/PAMunb/rvsec.git

%cd rvsec/rv-android/
!pip install -q -r requirements.txt

Cloning into 'rvsec'...
remote: Enumerating objects: 9174, done.[K
remote: Counting objects: 100% (545/545), done.[K
remote: Compressing objects: 100% (459/459), done.[K
remote: Total 9174 (delta 258), reused 294 (delta 32), pack-reused 8629 (from 2)[K
Receiving objects: 100% (9174/9174), 259.07 MiB | 24.32 MiB/s, done.
Resolving deltas: 100% (4017/4017), done.
Updating files: 100% (3015/3015), done.
/content/rvsec/rv-android
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m918.1/918.1 kB[0m [31m17.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m105.0/105.0 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m57.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [4]:
# Mount google drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
# Log in HF
from huggingface_hub import login
hf_token = userdata.get("HF_TOKEN")
login(hf_token, add_to_git_credential=True)

In [3]:
!git status
!pwd

/content


In [None]:
# drive.flush_and_unmount()
# !git add --all
# !git commit -a -m "Just testing"
# !git remote -v

#  Experiments

In [7]:
# Imports

from IPython.display import Markdown, display, update_display #, Image
import gradio as gr
from PIL import Image
import numpy as np
import os
import glob
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig
import torch
from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor, WhisperConfig, WhisperForConditionalGeneration

from rvandroid.llm.huggingface import HuggingFaceLLM

In [8]:
# Constants

LLAMA = "meta-llama/Meta-Llama-3.1-8B-Instruct" # needs permission
QWEN = "Qwen/Qwen2.5-3B-Instruct" # "Qwen/Qwen2.5-0.5B-Instruct" # "Qwen/Qwen2.5-3B-Instruct" # "Qwen/Qwen2.5-3B" # "Qwen/Qwen2.5-VL-7B-Instruct" "Qwen/Qwen2-7B-Instruct"
QWEN_0_5B = "Qwen/Qwen2.5-0.5B-Instruct"
# PHI2 = "microsoft/phi-2"
PHI3 = "microsoft/Phi-3-mini-4k-instruct"
PHI3_5="microsoft/Phi-3.5-mini-instruct"
GEMMA2 = "google/gemma-2-2b-it" # needs permission: https://huggingface.co/google/gemma-2-2b-it
STARCODER2 = "bigcode/starcoder2-3b"
FALCON= "tiiuae/Falcon3-3B-Instruct" # tiiuae/Falcon3-7B-Instruct # https://falconllm.tii.ae/
GRANITE = "ibm-granite/granite-3.1-8b-instruct"
DEEPSEEK = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B" # "deepseek-ai/deepseek-llm-7b-chat"
DEEPSEEK_CHAT = "deepseek-ai/deepseek-llm-7b-chat"

DEFAULT_MODEL = LLAMA
MODELS = [LLAMA, DEEPSEEK, DEEPSEEK_CHAT, PHI3, PHI3_5, GRANITE, QWEN, QWEN_0_5B, STARCODER2, FALCON]

In [None]:
# Text Generation
def text_generation_hf_pipeline(messages: list[str], model=QWEN):
  chat = pipeline("text-generation", model=model) #, device="cuda")
  return chat(messages)

In [None]:
if torch.cuda.is_available():
    print("GPU está disponível")
    # device = torch.device("cuda")  # Define o dispositivo como GPU
else:
    print("GPU não está disponível")
    # device = torch.device("cpu")  # Define o dispositivo como CPU

In [None]:
torch.cuda.empty_cache()

## Static Analysis

In [8]:
# static_folder = "/home/pedro/desenvolvimento/workspaces/workspaces-doutorado/workspace-rv/rvsec/rv-android/out"
static_folder = "/content/drive/MyDrive/llms/rvandroid/static"

def read_text_file(file_path):
    with open(file_path, 'r') as file:
        text = file.read()
    return text

def read_files_by_extension(folder: str, extension: str = "*.gesda"):
    files = glob.glob(os.path.join(static_folder, extension))
    for file in files:
      text = read_text_file(file)
      yield file, text

base_system_msg = """You are an expert assistant in testing the interface of Android applications, and you use this knowledge to make useful summaries about the components (activities, windows, widgets) contained on the screen. Some widgets may have information about which method will be called when it is clicked, others may have information about the assignment of this widget to a field declared in the class, listing all those that are relevant in the context of interface testing, and suggesting the possible actions on this component (click, set text, select item). The information about the application that must be understood is contained in a string in json format, which will be passed to you.
"""
base_prompt = "Make a summary of the application 'cryptoapp' which has the following information in json format: {}"

def create_messages(system_msg: str, prompt: str, json_text: str) -> list[dict[str, str]]:
    messages=[
        {"role": "system", "content": system_msg },
        {"role": "user", "content": prompt.format(json_text)}
    ]
    return messages

### GESDA

In [9]:

def get_gesda_files(folder: str):
    filenames = []
    texts = []
    for file, text in read_files_by_extension("*.gesda"):
      filenames.append(file)
      texts.append(text)
    return filenames, texts


In [12]:
# Basic example (GESDA)

text = read_text_file(static_folder+"/cryptoapp.apk.gesda")
# print(text)
messages=create_messages(base_system_msg, base_prompt, text)
# print(messages)

print("Generating ...")

hf = HuggingFaceLLM(DEFAULT_MODEL)
response = hf.generate(messages)
print(response)
hf.clean()

del hf

Generating ...


tokenizer_config.json:   0%|          | 0.00/55.4k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/855 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/184 [00:00<?, ?B/s]

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


system

Cutting Knowledge Date: December 2023
Today Date: 26 Jul 2024

You are an expert assistant in testing the interface of Android applications, and you use this knowledge to make useful summaries about the components (activities, windows, widgets) contained on the screen. Some widgets may have information about which method will be called when it is clicked, others may have information about the assignment of this widget to a field declared in the class, listing all those that are relevant in the context of interface testing, and suggesting the possible actions on this component (click, set text, select item). The information about the application that must be understood is contained in a string in json format, which will be passed to you.user

Make a summary of the application 'cryptoapp' which has the following information in json format: {"fileName":"cryptoapp.apk","packageName":"br.unb.cic.cryptoapp","windows":[{"id":1,"name":"br.unb.cic.cryptoapp.cipher.CipherActivity","isMai

### GATOR

In [13]:
# Basic example (GATOR)

text = read_text_file(static_folder+"/cryptoapp.apk.wtg")
print(text)
messages=create_messages(base_system_msg, base_prompt, text)
print(messages)

print("Generating ...")

hf = HuggingFaceLLM(LLAMA)
response = hf.generate(messages)
print(response)
hf.clean()

del hf

{"windows":[{"id":1533,"name":"presto.android.gui.stubs.PrestoFakeLauncherNodeClass"},{"id":1349,"name":"br.unb.cic.cryptoapp.MainActivity"},{"id":1336,"name":"br.unb.cic.cryptoapp.messagedigest.MessageDigestActivity"},{"id":1342,"name":"android.view.Menu"},{"id":1339,"name":"br.unb.cic.cryptoapp.cipher.CipherActivity"}],"transitions":[{"sourceId":1339,"getTargetId":1339,"events":[{"type":"implicit_home_event","handler":"","widgetId":1339,"widgetClass":"br.unb.cic.cryptoapp.cipher.CipherActivity"}],"callbacks":[]},{"sourceId":1342,"getTargetId":1349,"events":[{"type":"implicit_back_event","handler":"","widgetId":1342,"widgetClass":"android.view.Menu"}],"callbacks":[]},{"sourceId":1339,"getTargetId":1339,"events":[{"type":"implicit_rotate_event","handler":"","widgetId":1339,"widgetClass":"br.unb.cic.cryptoapp.cipher.CipherActivity"}],"callbacks":[{"type":"implicit_lifecycle_event","handler":"\u003cbr.unb.cic.cryptoapp.cipher.CipherActivity: void onCreate(android.os.Bundle)\u003e","widge

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


system

Cutting Knowledge Date: December 2023
Today Date: 26 Jul 2024

You are an expert assistant in testing the interface of Android applications, and you use this knowledge to make useful summaries about the components (activities, windows, widgets) contained on the screen. Some widgets may have information about which method will be called when it is clicked, others may have information about the assignment of this widget to a field declared in the class, listing all those that are relevant in the context of interface testing, and suggesting the possible actions on this component (click, set text, select item). The information about the application that must be understood is contained in a string in json format, which will be passed to you.user

Make a summary of the application 'cryptoapp' which has the following information in json format: {"windows":[{"id":1533,"name":"presto.android.gui.stubs.PrestoFakeLauncherNodeClass"},{"id":1349,"name":"br.unb.cic.cryptoapp.MainActivity"},{

### Gradio

In [3]:
# Local
# static_folder = "/home/pedro/desenvolvimento/workspaces/workspaces-doutorado/workspace-rv/rvsec/rv-android/out"
# Google Colab
static_folder = "/content/drive/MyDrive/llms/rvandroid/static"


def get_gesda_files(folder: str):
    filenames = []
    texts = []
    for file, text in read_files_by_extension("*.gesda"):
      filenames.append(file)
      texts.append(text)
    return filenames, texts

llm = HuggingFaceLLM(DEFAULT_MODEL)
# response = hf.generate(messages)
# print(response)
# hf.clean()

# del hf

# Get GESDA files and texts
files, texts = get_gesda_files(static_folder)
print(files)

current_index = 0  # Index of the currently displayed file
selected_model = DEFAULT_MODEL # Currently selected model

def get_system_prompt():
   """Returns the base system prompt."""
   return base_system_msg

def get_user_prompt():
   """Returns the base user prompt."""
   return base_prompt

def generate_output(system_prompt, user_prompt):
  """Generates output using the LLM."""
  messages = create_messages(system_prompt, user_prompt, texts[current_index])
  response = llm.generate(messages)
  # torch.cuda.empty_cache()  # Clear CUDA cache (commented out as per original code)
  print(response)
  return response

def display_file(index):
    """Returns the filename at the given index."""
    return files[index]

def process_selection(selected_option):
  """Processes the model selection."""
  global selected_model
  selected_model = selected_option
  print(f"You selected: {selected_option}")
  global llm # Make llm global so it can be reassigned
  llm = HuggingFaceLLM(selected_model) # Initialize the LLM with the selected model

def advance_file():
    """Advances to the next file in the list."""
    global current_index
    current_index = (current_index + 1) % len(files)  # Wraps around to the beginning if at the end
    return display_file(current_index)

def go_back_file():
    """Goes back to the previous file in the list."""
    global current_index
    current_index = (current_index - 1) % len(files)  # Wraps around to the end if at the beginning
    return display_file(current_index)

def reset(system_prompt, user_prompt, result_text):
  """Resets the system and user prompts and the result text."""
  return base_system_msg, base_prompt, ""

def clear_memory():
  """Clears the LLM memory and CUDA cache."""
  if llm is not None:
    llm.clean()
  torch.cuda.empty_cache()
  return ""

with gr.Blocks() as demo:
    with gr.Row():
      filename = gr.Textbox(label="GESDA file", lines=1, value=display_file(current_index))

    with gr.Row():
      previous_button = gr.Button("Previous")
      next_button = gr.Button("Next")

    with gr.Row():
      system_textbox = gr.Textbox(label="System Prompt", value=get_system_prompt()) #, lines=5)
      prompt_textbox = gr.Textbox(label="User Prompt", value=get_user_prompt()) #, lines=3)

    with gr.Row():
      model_dropdown = gr.Dropdown(
        label="Select MODEL",
        choices=MODELS,
        value=DEFAULT_MODEL
      )
      with gr.Row():
        generate_button = gr.Button("Generate")
        reset_button = gr.Button("Reset")
        xxx = gr.Textbox(label="")

    with gr.Row():
      result_textbox = gr.Textbox(lines=10)

    with gr.Row():
      clear_button = gr.Button("Clear memory")

    previous_button.click(go_back_file, outputs=filename)
    next_button.click(advance_file, outputs=filename)
    model_dropdown.change(fn=process_selection, inputs=model_dropdown)
    generate_button.click(generate_output, inputs=[system_textbox, prompt_textbox], outputs=result_textbox)
    reset_button.click(reset, inputs=[system_textbox, prompt_textbox, result_textbox], outputs=[system_textbox, prompt_textbox, result_textbox])
    clear_button.click(clear_memory)

demo.launch(debug=True)

del demo

NameError: name 'HuggingFaceLLM' is not defined

## Screen to Text

### droidbot-GPT

### rv-android

### VQA

In [9]:
def redimensionar_imagem(caminho_imagem, largura_maxima, altura_maxima):
    """
    Redimensiona uma imagem para as dimensões máximas especificadas,
    mantendo a proporção original.

    Args:
        caminho_imagem: O caminho para a imagem original.
        largura_maxima: A largura máxima desejada.
        altura_maxima: A altura máxima desejada.

    Returns:
        Uma imagem PIL redimensionada.
    """
    image = Image.open(caminho_imagem)
    image.thumbnail((largura_maxima, altura_maxima))
    return image

In [12]:
# facebook/blip-large: Este modelo é um dos mais populares e oferece um bom equilíbrio entre desempenho e tamanho. Ele é capaz de responder a perguntas complexas sobre imagens e gerar descrições detalhadas.
# google/flan-t5-xxl: Embora seja um modelo maior, o Flan-T5-XXL pode ser usado para VQA com bom desempenho em GPUs T4, especialmente se você otimizar o uso da memória. Ele é conhecido por sua capacidade de gerar texto de alta qualidade.
# Salesforce/blip-2-flan-t5-xl: Este modelo combina o poder do BLIP-2 para visão com o modelo Flan-T5-XL para linguagem, oferecendo resultados impressionantes em tarefas de VQA.

# Escolha um modelo
# default: dandelin/vilt-b32-finetuned-vqa
model_name = "facebook/blip-large"
# model_name = "Salesforce/blip-2-flan-t5-xl"
# model_name = "google/flan-t5-xxl"

# Crie o pipeline de VQA
vqa_pipeline = pipeline("visual-question-answering", model=model_name)

# Carregue a imagem
image_path = "/content/drive/MyDrive/llms/cryptoapp/001.png"
image = Image.open(image_path)
image = redimensionar_imagem(image_path, 512, 512)

# Defina a pergunta
# question = "O que está acontecendo na imagem?"
question = """
Instruções
Descreva a tela do aplicativo Android em detalhes, conforme as instruções fornecidas.

Formato de Resposta
A resposta deve ser estruturada em um formato de tabela ou lista, facilitando a identificação e o uso das informações para testes.

Considerações Adicionais
Adapte este prompt para suas necessidades específicas, incluindo detalhes sobre o aplicativo e os tipos de teste que você deseja realizar.
Seja claro e específico nas suas instruções para obter uma resposta mais precisa e útil.
Use a criatividade para explorar diferentes tipos de interações e ações que podem ser realizadas na tela.
"""

# Obtenha a resposta
result = vqa_pipeline(image, question)
print(result)

config.json:   0%|          | 0.00/674 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/53.0k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/5 [00:00<?, ?it/s]

model-00001-of-00005.safetensors:   0%|          | 0.00/9.45G [00:00<?, ?B/s]

model-00002-of-00005.safetensors:   0%|          | 0.00/9.60G [00:00<?, ?B/s]

model-00003-of-00005.safetensors:   0%|          | 0.00/9.96G [00:00<?, ?B/s]

model-00004-of-00005.safetensors:   0%|          | 0.00/10.0G [00:00<?, ?B/s]

model-00005-of-00005.safetensors:   0%|          | 0.00/6.06G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

Device set to use cuda:0


OutOfMemoryError: CUDA out of memory. Tried to allocate 160.00 MiB. GPU 0 has a total capacity of 14.74 GiB of which 126.12 MiB is free. Process 2462 has 14.62 GiB memory in use. Of the allocated memory 14.44 GiB is allocated by PyTorch, and 54.14 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [14]:
torch.cuda.empty_cache()
del vqa_pipeline

In [2]:
!pip install -q transformers torch torchvision Pillow opencv-python pytesseract
!sudo apt install -y tesseract-ocr
!sudo apt install -y libtesseract-dev

from transformers import ViTFeatureExtractor, ViTModel, BertTokenizer, BertModel
from PIL import Image
import torch
import cv2
import pytesseract

# Modelos de visão
feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224')
model_vision = ViTModel.from_pretrained('google/vit-base-patch16-224')

# Modelos de linguagem
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model_language = BertModel.from_pretrained('bert-base-uncased')

# Configuração do Tesseract OCR
pytesseract.pytesseract.tesseract_cmd = '/usr/bin/tesseract'  # Ajuste o caminho para o seu Tesseract



def identificar_elementos(imagem):
  """Identifica elementos interativos na imagem usando OpenCV."""
  # Converta a imagem para escala de cinza
  gray = cv2.cvtColor(imagem, cv2.COLOR_BGR2GRAY)

  # Use detecção de bordas para encontrar contornos
  edges = cv2.Canny(gray, 50, 150, apertureSize=3)

  # Encontre contornos
  contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

  elementos = []
  for contour in contours:
    # Obtenha as coordenadas do retângulo delimitador
    x, y, w, h = cv2.boundingRect(contour)

    # Considere apenas contornos com área razoável
    if w * h > 1000:
      elementos.append({'x': x, 'y': y, 'w': w, 'h': h})

  return elementos

def extrair_texto(imagem, elemento):
  """Extrai texto de um elemento usando Tesseract OCR."""
  x, y, w, h = elemento['x'], elemento['y'], elemento['w'], elemento['h']
  crop = imagem[y:y+h, x:x+w]
  texto = pytesseract.image_to_string(crop)
  return texto.strip()


# Carregue o screenshot
imagem = cv2.imread("/content/drive/MyDrive/llms/cryptoapp/001.png")

# Identifique os elementos interativos
elementos = identificar_elementos(imagem)

# Extraia o texto dos elementos
for elemento in elementos:
  elemento['texto'] = extrair_texto(imagem, elemento)

# Gere a descrição textual
descricao = "Tela com os seguintes elementos:\n"
for elemento in elementos:
  descricao += f"- {elemento['texto']} ({elemento['x']}, {elemento['y']}, {elemento['w']}, {elemento['h']})\n"

# Gere as possíveis ações
acoes = []
for elemento in elementos:
  if elemento['texto']:
    acoes.append(f"Interagir com o elemento: {elemento['texto']}")

print(descricao)
print(acoes)

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  tesseract-ocr-eng tesseract-ocr-osd
The following NEW packages will be installed:
  tesseract-ocr tesseract-ocr-eng tesseract-ocr-osd
0 upgraded, 3 newly installed, 0 to remove and 19 not upgraded.
Need to get 4,816 kB of archives.
After this operation, 15.6 MB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 tesseract-ocr-eng all 1:4.00~git30-7274cfa-1.1 [1,591 kB]
Get:2 http://archive.ubuntu.com/ubuntu jammy/universe amd64 tesseract-ocr-osd all 1:4.00~git30-7274cfa-1.1 [2,990 kB]
Get:3 http://archive.ubuntu.com/ubuntu jammy/universe amd64 tesseract-ocr amd64 4.1.1-2.1build1 [236 kB]
Fetched 4,816 kB in 2s (2,913 kB/s)
debconf: unable to initialize frontend: Dialog
debconf: (No usable dialog-like program is installed, so the dialog based frontend cannot be used. at /usr/share/perl5/Debc

Some weights of ViTModel were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized: ['vit.pooler.dense.bias', 'vit.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Tela com os seguintes elementos:
-  (521, 1838, 38, 38)
-  (831, 1837, 40, 40)
- y ‘N (207, 1836, 37, 42)
-  (0, 1793, 1080, 1)
-  (0, 436, 1080, 12)
-  (0, 350, 1080, 12)
-  (0, 310, 1080, 12)
-  (0, 224, 1080, 12)
-  (0, 210, 1080, 1)
-  (98, 128, 26, 41)
-  (219, 119, 35, 38)
-  (44, 118, 31, 40)
-  (0, 62, 1080, 1)
-  (988, 15, 32, 32)
-  (945, 14, 39, 33)
- (2) (143, 13, 36, 37)

['Interagir com o elemento: y ‘N', 'Interagir com o elemento: (2)']


## Tokenizer

In [None]:
def create_prompt(messages: list[dict[str, str]], model=DEFAULT_MODEL):
    pass

In [None]:
# tokenizer = AutoTokenizer.from_pretrained('meta-llama/Meta-Llama-3.1-8B', trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(LLAMA, trust_remote_code=True)

text = "I am excited to show Tokenizers in action to my LLM engineers"
tokens = tokenizer.encode(text)
tokens
tokenizer.decode(tokens)
tokenizer.batch_decode(tokens)
tokenizer.get_added_vocab()


messages = [
    {"role": "system", "content": "You are a helpful assistant"},
    {"role": "user", "content": "Tell a light-hearted joke for a room of Data Scientists"}
  ]

prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
print(prompt)

In [None]:
messages = [
    {"role": "system", "content": "You are a helpful assistant"},
    {"role": "user", "content": "Tell a light-hearted joke for a room of Data Scientists"}
  ]

# Quantization Config - this allows us to load the model into memory and use less memory
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4"
)

# Tokenizer
tokenizer = AutoTokenizer.from_pretrained(DEFAULT_MODEL)
tokenizer.pad_token = tokenizer.eos_token
inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")

# The model
model = AutoModelForCausalLM.from_pretrained(DEFAULT_MODEL, device_map="auto", quantization_config=quant_config)

In [None]:
memory = model.get_memory_footprint() / 1e6
print(f"Memory footprint: {memory:,.1f} MB")

In [None]:
model

In [None]:
outputs = model.generate(inputs, max_new_tokens=80)
print(tokenizer.decode(outputs[0]))

In [None]:
# Clean up
del inputs, outputs, model
torch.cuda.empty_cache()

In [None]:
#

In [None]:
# response = hf.generate(messages)
# print(response)

In [None]:
for r in result:
    g = r["generated_text"]
    # print(g)
    for x in g:
        print(x)