# RV-ANDROID playground

# Config

## Local

`sudo apt install python3.12-dev nvidia-cuda-toolkit bitsandbytes triton`

```
nvidia-smi
nvcc --version
``` 

## Google colab

In [None]:
!pip install -q gradio diffusers transformers accelerate torch Pillow

#datasets

In [None]:
# Clone RVSec
from google.colab import userdata, drive

!rm -Rf sample_data/

#https://github.com/ad17171717/YouTube-Tutorials/blob/main/Google%20Colab%20Tutorials/Google_Colab_%2B_Git_Pushing_Changes_to_a_GitHub_Repo!.ipynb
!git config --global user.name "phtcosta"
!git config --global user.email "phtcosta@gmail.com"

# https://github.com/settings/tokens
github_token = userdata.get('GITHUB_TOKEN')
!git clone --branch develop https://{github_token}@github.com/PAMunb/rvsec.git

%cd rvsec/rv-android/
!pip install -q -r requirements.txt

In [None]:
!pwd

In [None]:
# Mount google drive
drive.mount("/content/drive")

In [None]:
!git status

In [None]:
# drive.flush_and_unmount()
!git add --all
!git commit -a -m "Just testing"
# !git remote -v

#  Experiments

In [1]:
# Imports

from dotenv import load_dotenv
from IPython.display import Markdown, display, update_display #, Image
import gradio as gr
from PIL import Image
import numpy as np
import os
import glob
from huggingface_hub import login
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig
import torch
from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor, WhisperConfig, WhisperForConditionalGeneration

from rvandroid.llm.huggingface import HuggingFaceLLM

In [2]:
# Constants

LLAMA = "meta-llama/Meta-Llama-3.1-8B-Instruct" # needs permission
QWEN = "Qwen/Qwen2.5-3B-Instruct" # "Qwen/Qwen2.5-0.5B-Instruct" # "Qwen/Qwen2.5-3B-Instruct" # "Qwen/Qwen2.5-3B" # "Qwen/Qwen2.5-VL-7B-Instruct" "Qwen/Qwen2-7B-Instruct"
# PHI2 = "microsoft/phi-2"
PHI3 = "microsoft/Phi-3-mini-4k-instruct" 
PHI3_5="microsoft/Phi-3.5-mini-instruct" 
GEMMA2 = "google/gemma-2-2b-it" # needs permission: https://huggingface.co/google/gemma-2-2b-it
STARCODER2 = "bigcode/starcoder2-3b"
FALCON= "tiiuae/Falcon3-3B-Instruct" # tiiuae/Falcon3-7B-Instruct # https://falconllm.tii.ae/
GRANITE = "ibm-granite/granite-3.1-8b-instruct"
DEEPSEEK = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B" # "deepseek-ai/deepseek-llm-7b-chat"

DEFAULT_MODEL = LLAMA
MODELS = [LLAMA, QWEN, PHI3, PHI3_5, GEMMA2, STARCODER2, FALCON, GRANITE]

In [3]:
# Log in HF

load_dotenv(override=True)

hf_token = os.getenv('HF_TOKEN')
login(hf_token) #, add_to_git_credential=True)

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [None]:
# download models
messages = [
    {"role": "system", "content": "You are a helpful assistant"},
    {"role": "user", "content": "Tell a light-hearted joke for a room of Data Scientists"}
  ]

MODELS = ["deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", "deepseek-ai/deepseek-llm-7b-chat"]
for model in MODELS:
    print(f"MODEL={model}")
    hf = HuggingFaceLLM(model)
    print(hf.generate(messages))
    hf.clean()

In [None]:
# Text Generation
def text_generation_hf_pipeline(messages: list[str], model=QWEN):
  chat = pipeline("text-generation", model=model) #, device="cuda")
  return chat(messages)

In [None]:
if torch.cuda.is_available():
    print("GPU está disponível")
    # device = torch.device("cuda")  # Define o dispositivo como GPU
else:
    print("GPU não está disponível")
    # device = torch.device("cpu")  # Define o dispositivo como CPU

In [None]:
torch.cuda.empty_cache() 

## Static Analysis

In [4]:
static_folder = "/home/pedro/desenvolvimento/workspaces/workspaces-doutorado/workspace-rv/rvsec/rv-android/out"

def read_text_file(file_path):
    with open(file_path, 'r') as file:
        text = file.read()
    return text

def read_files_by_extension(folder: str, extension: str = "*.gesda"):
    files = glob.glob(os.path.join(static_folder, extension))
    for file in files:
      text = read_text_file(file)
      yield file, text

### GESDA

In [5]:
base_system_msg = """You are an expert assistant in testing the interface of Android applications, and you use this knowledge to make useful summaries about the components (activities, windows, widgets) contained on the screen. Some widgets may have information about which method will be called when it is clicked, others may have information about the assignment of this widget to a field declared in the class, listing all those that are relevant in the context of interface testing, and suggesting the possible actions on this component (click, set text, select item). The information about the application that must be understood is contained in a string in json format, which will be passed to you.
"""
base_prompt = "Make a summary of the application 'cryptoapp' which has the following information in json format: {}"

def create_messages(system_msg: str, prompt: str, json_text: str) -> list[dict[str, str]]:
    messages=[
        {"role": "system", "content": system_msg },
        {"role": "user", "content": prompt.format(json_text)}
    ]
    return messages


In [None]:
# Basic example

text = read_text_file("/home/pedro/desenvolvimento/workspaces/workspaces-doutorado/workspace-rv/rvsec/rv-android/out/cryptoapp.apk.gesda")
# print(text)
messages=create_messages(base_system_msg, base_prompt, text)
# print(messages)

print("Generating ...")

hf = HuggingFaceLLM(LLAMA)
response = hf.generate(messages)
print(response)
hf.clean()

del hf

2025-02-10 10:20:06,040 - INFO - rvandroid.llm.huggingface - Loading tokenizer for meta-llama/Meta-Llama-3.1-8B-Instruct...


Generating ...


tokenizer_config.json:   0%|          | 0.00/55.4k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

2025-02-10 10:20:10,568 - INFO - rvandroid.llm.huggingface - Loading model meta-llama/Meta-Llama-3.1-8B-Instruct on cuda...


config.json:   0%|          | 0.00/855 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/184 [00:00<?, ?B/s]

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
2025-02-10 10:44:20,350 - INFO - rvandroid.llm.huggingface - Model and tokenizer unloaded, CUDA cache cleared.


system

Cutting Knowledge Date: December 2023
Today Date: 26 Jul 2024

You are an expert assistant in testing the interface of Android applications, and you use this knowledge to make useful summaries about the components (activities, windows, widgets) contained on the screen. Some widgets may have information about which method will be called when it is clicked, others may have information about the assignment of this widget to a field declared in the class, listing all those that are relevant in the context of interface testing, and suggesting the possible actions on this component (click, set text, select item). The information about the application that must be understood is contained in a string in json format, which will be passed to you.user

Make a summary of the application 'cryptoapp' which has the following information in json format: {"fileName":"cryptoapp.apk","packageName":"br.unb.cic.cryptoapp","windows":[{"id":1,"name":"br.unb.cic.cryptoapp.cipher.CipherActivity","isMai

In [None]:
# Local
static_folder = "/home/pedro/desenvolvimento/workspaces/workspaces-doutorado/workspace-rv/rvsec/rv-android/out"
# Google Colab
# static_folder = "/content/drive/MyDrive/llms/rvandroid/static"


def get_gesda_files(folder: str):
    filenames = []
    texts = []
    for file, text in read_files_by_extension("*.gesda"):
      filenames.append(file)
      texts.append(text)
    return filenames, texts



files, texts = get_gesda_files(static_folder)
print(files)

indice_atual = 0

def get_system_prompt():
   return base_system_msg

def get_user_prompt():
   return base_prompt

def generate_output(system, prompt):
  print(system)
  create_messages(system, prompt, texts[indice_atual])
  return "Hello"

def mostrar_imagem(indice):
    return files[indice]

def processar_selecao(opcao):
  return f"Você selecionou: {opcao}"

def avancar_imagem():
    """Avança para a próxima imagem da lista."""
    global indice_atual
    indice_atual = (indice_atual + 1) % len(files)  # Volta ao início se chegar ao fim da lista
    return mostrar_imagem(indice_atual)

def voltar_imagem():
    """Volta para a imagem anterior da lista."""
    global indice_atual
    indice_atual = (indice_atual - 1) % len(files)  # Volta para o fim se chegar ao início da lista
    return mostrar_imagem(indice_atual)

def reset(system, prompt, result):
  return base_system_msg, base_prompt, ""

with gr.Blocks() as demo:
    with gr.Row():
      filename = gr.Textbox(label="GESDA file", lines=1, value=mostrar_imagem(indice_atual))
    
    with gr.Row():
      btn_previous = gr.Button("Previous")
      btn_next = gr.Button("Next")
    
    with gr.Row():
      system_textbox = gr.Textbox(label="System Prompt", value=get_system_prompt()) #, lines=5)
      prompt_textbox = gr.Textbox(label="User Prompt", value=get_user_prompt()) #, lines=3)

    # with gr.Row():
    #   prompt_textbox = gr.Textbox(label="User Prompt", value=get_user_prompt(), lines=3)

    with gr.Row():        
      dropdown = gr.Dropdown(
        label="Select MODEL",
        choices=MODELS,         
        value=DEFAULT_MODEL
      )
      with gr.Row():
        btn_generate = gr.Button("Generate")
        btn_reset = gr.Button("Reset")
    
    # with gr.Row():        
    #   btn_generate = gr.Button("Generate")
        
    with gr.Row():
      result = gr.Textbox(lines=10)
            
    btn_previous.click(voltar_imagem, outputs=filename)
    btn_next.click(avancar_imagem, outputs=filename)
    dropdown.change(fn=processar_selecao, inputs=dropdown)
    btn_generate.click(generate_output, inputs=[system_textbox, prompt_textbox], outputs=result)
    btn_reset.click(reset, inputs=[system_textbox, prompt_textbox, result], outputs=[system_textbox, prompt_textbox, result])

demo.launch()


['/home/pedro/desenvolvimento/workspaces/workspaces-doutorado/workspace-rv/rvsec/rv-android/out/livio.rssreader_101.apk.gesda', '/home/pedro/desenvolvimento/workspaces/workspaces-doutorado/workspace-rv/rvsec/rv-android/out/org.passwordmaker.android_11.apk.gesda', '/home/pedro/desenvolvimento/workspaces/workspaces-doutorado/workspace-rv/rvsec/rv-android/out/com.github.axet.hourlyreminder_476.apk.gesda', '/home/pedro/desenvolvimento/workspaces/workspaces-doutorado/workspace-rv/rvsec/rv-android/out/com.gianlu.dnshero_40.apk.gesda', '/home/pedro/desenvolvimento/workspaces/workspaces-doutorado/workspace-rv/rvsec/rv-android/out/org.secuso.privacyfriendlydicer_8.apk.gesda', '/home/pedro/desenvolvimento/workspaces/workspaces-doutorado/workspace-rv/rvsec/rv-android/out/com.thibaudperso.sonycamera_24.apk.gesda', '/home/pedro/desenvolvimento/workspaces/workspaces-doutorado/workspace-rv/rvsec/rv-android/out/ee.ioc.phon.android.speak_1814.apk.gesda', '/home/pedro/desenvolvimento/workspaces/workspac

2025-02-10 11:21:05,841 - INFO - httpx - HTTP Request: GET http://127.0.0.1:7883/gradio_api/startup-events "HTTP/1.1 200 OK"
2025-02-10 11:21:05,851 - INFO - httpx - HTTP Request: HEAD http://127.0.0.1:7883/ "HTTP/1.1 200 OK"


* Running on local URL:  http://127.0.0.1:7883

To create a public link, set `share=True` in `launch()`.




2025-02-10 11:21:06,489 - INFO - httpx - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"


In [None]:
response = hf.generate(messages)
print(response)

In [None]:
for r in result:
    g = r["generated_text"]
    # print(g)
    for x in g:
        print(x)

In [None]:



# text = read_text_file("/home/pedro/desenvolvimento/workspaces/workspaces-doutorado/workspace-rv/rvsec/rv-android/out/cryptoapp.apk.gesda")
# print(text)
# system_msg = """You are an expert assistant in testing the interface of Android applications, and you use this knowledge to make useful summaries about the components (activities, windows, widgets) contained on the screen. Some widgets may have information about which method will be called when it is clicked, others may have information about the assignment of this widget to a field declared in the class, listing all those that are relevant in the context of interface testing, and suggesting the possible actions on this component (click, set text, select item). The information about the application that must be understood is contained in a string in json format, which will be passed to you.
# """
# messages=[
#   {"role": "system", "content": system_msg },
#   {"role": "user", "content": f"Make a summary of the application 'cryptoapp' which has the following information in json format: {text}"}
# ]
# print(messages)
# print("Consultando ...")


# from rvandroid.llm.huggingface import HuggingFace
# hf = HuggingFace(LLAMA)

# response = hf.generate(messages)
# print(response)


# # #sudo apt install nvidia-cuda-toolkit
# # #nvcc --version

# # quant_config = BitsAndBytesConfig(
# #     load_in_4bit=True,
# #     bnb_4bit_use_double_quant=True,
# #     bnb_4bit_compute_dtype=torch.bfloat16,
# #     bnb_4bit_quant_type="nf4"
# # )

# # tokenizer = AutoTokenizer.from_pretrained(LLAMA)
# # tokenizer.pad_token = tokenizer.eos_token

# # inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")

# # # streamer = TextStreamer(tokenizer)

# # model = AutoModelForCausalLM.from_pretrained(LLAMA, device_map="auto", quantization_config=quant_config)
# # outputs = model.generate(inputs, max_new_tokens=2000) #, streamer=streamer)

# # response = tokenizer.decode(outputs[0])
# # print(response)

# # del inputs, outputs, model
# # torch.cuda.empty_cache()

# # result = text_generation_hf_pipeline(messages, model=GEMMA2)
# # print(result)




## Tokenizer

In [None]:
def create_prompt(messages: list[dict[str, str]], model=DEFAULT_MODEL):
    pass

In [None]:
# tokenizer = AutoTokenizer.from_pretrained('meta-llama/Meta-Llama-3.1-8B', trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained('meta-llama/Meta-Llama-3.1-8B-Instruct', trust_remote_code=True)

text = "I am excited to show Tokenizers in action to my LLM engineers"
tokens = tokenizer.encode(text)
tokens
tokenizer.decode(tokens)
tokenizer.batch_decode(tokens)
tokenizer.get_added_vocab()


messages = [
    {"role": "system", "content": "You are a helpful assistant"},
    {"role": "user", "content": "Tell a light-hearted joke for a room of Data Scientists"}
  ]

prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
print(prompt)

In [None]:
messages = [
    {"role": "system", "content": "You are a helpful assistant"},
    {"role": "user", "content": "Tell a light-hearted joke for a room of Data Scientists"}
  ]

# Quantization Config - this allows us to load the model into memory and use less memory
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4"
)

# Tokenizer
tokenizer = AutoTokenizer.from_pretrained(DEFAULT_MODEL)
tokenizer.pad_token = tokenizer.eos_token
inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")

# The model
model = AutoModelForCausalLM.from_pretrained(DEFAULT_MODEL, device_map="auto", quantization_config=quant_config)

In [None]:
memory = model.get_memory_footprint() / 1e6
print(f"Memory footprint: {memory:,.1f} MB")

In [None]:
model

In [None]:
outputs = model.generate(inputs, max_new_tokens=80)
print(tokenizer.decode(outputs[0]))

In [None]:
# Clean up
del inputs, outputs, model
torch.cuda.empty_cache()

In [None]:
# 