In [1]:
#!pip install qwen_vl_utils
#!pip install sentence-transformers faiss-cpu transformers
#!pip install torchvision
#!pip install accelerate
!pip show flash_attn

Name: flash_attn
Version: 2.7.4.post1
Summary: Flash Attention: Fast and Memory-Efficient Exact Attention
Home-page: https://github.com/Dao-AILab/flash-attention
Author: Tri Dao
Author-email: tri@tridao.me
License: 
Location: /home/capstone_student/Documents/sinarmas_project_flowchart/venv/lib/python3.12/site-packages
Requires: einops, torch
Required-by: 


In [2]:
import subprocess

def check_cuda_memory():
    try:
        output = subprocess.check_output(["nvidia-smi", "--query-gpu=memory.total,memory.used,memory.free", "--format=csv,nounits,noheader"])
        total, used, free = map(int, output.decode("utf-8").strip().split("\n")[0].split(", "))
        print(f"Total GPU Memory: {total} MB")
        print(f"Used GPU Memory: {used} MB")
        print(f"Free GPU Memory: {free} MB")
    except Exception as e:
        print(f"Error: {e}")

check_cuda_memory()


Total GPU Memory: 24576 MB
Used GPU Memory: 943 MB
Free GPU Memory: 23203 MB


In [27]:
documents = [
    '''
    A[Purchase Order Received];
    B{Current Customer?};
    C[hello world];
    D{Customer from US};
    E[Process New Customer Record];
    F[Submit to Controller for Approval];
    G[Input Order];
    H[Delete Order];
    A --> B;
    B -- Yes --> C;
    B -- No --> D;
    D -- Yes --> E;
    D -- No --> F;
    E --> G;
    F --> G;
    C --> H;
    G --> H;
    '''
]

In [28]:
# Convert documents to embeddings
from sentence_transformers import SentenceTransformer

embed_model = SentenceTransformer("all-MiniLM-L6-v2")

document_embeddings = [embed_model.encode(doc) for doc in documents]

print(f"Generated {len(document_embeddings)} embeddings.")

Generated 1 embeddings.


In [29]:
# Convert embeddings to NumPy array and add to FAISS index
import faiss
import numpy as np

dimension = len(document_embeddings[0])  # Vector size 
index = faiss.IndexFlatL2(dimension)

vectors = np.array(document_embeddings, dtype="float32")
index.add(vectors)

print("Stored document embeddings in FAISS vector database.")


Stored document embeddings in FAISS vector database.


In [30]:
# Code from https://github.com/QwenLM/Qwen2.5-VL, adjusted with 3B version

from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
from qwen_vl_utils import process_vision_info
import torch
import os

os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

# default: Load the model on the available device(s)
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
    "Qwen/Qwen2.5-VL-3B-Instruct", 
    torch_dtype=torch.bfloat16, 
    attn_implementation="flash_attention_2",
    device_map="auto"
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [35]:
from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration
import torch

# Function to retrieve the most relevant document based on the user's actual question
def retrieve_relevant_text(user_question, top_k=2):
    query_embedding = embed_model.encode(user_question).reshape(1, -1)
    _, retrieved_indices = index.search(query_embedding, top_k)
    retrieved_docs = [documents[i] for i in retrieved_indices[0]]
    return retrieved_docs


# Load Qwen model and processor
processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-3B-Instruct")

# SET USER QUERY HERE
user_question = '''What comes after 'Input Order'? Prioritize answering the system prompt first'''

# Use the user's actual question as the query
retrieved_context = retrieve_relevant_text(user_question)

# Debug output: Print retrieved context separately
print("\n[DEBUG] Retrieved Context:")
for i, fact in enumerate(retrieved_context, 1):
    print(f"{i}. {fact}")

# Combine retrieved context into a single string for input
retrieved_text = " ".join(retrieved_context)

# 🛠️ Prioritize dataset knowledge but allow fallback to general knowledge
messages = [
    {"role": "context", "content": 
     'Flowchart:\n' + ('\n'.join(retrieved_context[0].split('\n')[1:])) + f"\nThere are {len(retrieved_context[0].split('\n'))-2} lines in the flowchart"},
    {"role": "system", "content": (
        '''

Here are instructions on how to read the flowcharts:
The first few lines dictate each node's labels. Each line, ended with a semicolon(;), denotes the label like so:
(X)[(label)], where (X) is the symbol that will be used to reference this node, and (label) is the text in that node.
Square brackets ([]) denotes a process node, while curly braces ({}) denotes a decision node.
The rest of the lines denote the flow of operations.
It is the format:
(X) --> (Y) 
which denotes that node Y happens after node X.
Or the format:
(X) -- (Yes/No) --> (Y),
where X is a decision node, and Y happens if the condition (Yes/No) matches.
Do not answer the user query. Explain the flowchart first line by line first. Explain the reasoning behind each line.
Search for the line with the relevant label to the user query.
Then, search the line with the symbol relevant to the user query.
Then answer the query.
        '''
    )}, 
    {"role": "user", "content": (user_question)},
    
]

# Convert to model input
text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = processor(
    text=[text],
    padding=True,
    return_tensors="pt",
).to(model.device)

# Generate response
generated_ids = model.generate(**inputs, max_new_tokens=512)
response = processor.batch_decode(generated_ids, skip_special_tokens=True)

# Print final output in a structured format
print("\n=== 💬 AI with RAG Response ===")
print("system\nUse the following information to answer the question. Prioritize this information over other knowledge")
print()
print("Result:")
for r in response:
    print(r)
    print('====')
print("=======================")



[DEBUG] Retrieved Context:
1. 
    A[Purchase Order Received];
    B{Current Customer?};
    C[hello world];
    D{Customer from US};
    E[Process New Customer Record];
    F[Submit to Controller for Approval];
    G[Input Order];
    H[Delete Order];
    A --> B;
    B -- Yes --> C;
    B -- No --> D;
    D -- Yes --> E;
    D -- No --> F;
    E --> G;
    F --> G;
    C --> H;
    G --> H;
    
2. 
    A[Purchase Order Received];
    B{Current Customer?};
    C[hello world];
    D{Customer from US};
    E[Process New Customer Record];
    F[Submit to Controller for Approval];
    G[Input Order];
    H[Delete Order];
    A --> B;
    B -- Yes --> C;
    B -- No --> D;
    D -- Yes --> E;
    D -- No --> F;
    E --> G;
    F --> G;
    C --> H;
    G --> H;
    

=== 💬 AI with RAG Response ===
system
Use the following information to answer the question. Prioritize this information over other knowledge

Result:
system
You are a helpful assistant.
context
Flowchart:
    A[Purchase Ord