In [1]:
import requests
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_qdrant import Qdrant, QdrantVectorStore
from langchain_ollama import OllamaLLM
from langchain.chains import RetrievalQA

# Ollama settings

In [2]:
!ollama list

NAME               ID              SIZE      MODIFIED    
llama3.2:latest    a80c4f17acd5    2.0 GB    4 weeks ago    


In [3]:
!ollama serve

Error: listen tcp 127.0.0.1:11434: bind: address already in use


In [4]:
def query_llama(prompt, model="llama3.2"):
    url = "http://localhost:11434/api/chat"
    headers = {"Content-Type": "application/json"}
    payload = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}]
    }
    response = requests.post(url, json=payload, headers=headers, stream=True)
    
    if response.status_code != 200:
        print(f"Błąd {response.status_code}: {response.text}")
        return None

    full_response = ""
    for line in response.iter_lines():
        if line:
            try:
                json_line = line.decode('utf-8')
                data = requests.models.complexjson.loads(json_line)
                full_response += data.get("message", {}).get("content", "")
            except ValueError:
                print(f"Niepoprawny JSON: {line}")
    
    return full_response

prompt = "Mówisz po polsku?"
result = query_llama(prompt)
print(result)

Tak, mówię po polsku. Jak mogę pomoć Ci w języku polskim?


# Qdrant

In [5]:
# !docker pull qdrant/qdrant

In [6]:
# !docker run -p 6333:6333 -p 6334:6334 \
#     -v $(pwd)/qdrant_storage:/qdrant/storage:z \
#     qdrant/qdrant

In [7]:
from qdrant_client import QdrantClient

client = QdrantClient(url="http://localhost:6333")

In [8]:
def load_pdf(file_path):
    loader = PyPDFLoader(file_path)
    documents = loader.load()
    return documents

doc = load_pdf("./doc.pdf")

In [9]:
# Step 2: Split documents into appropriate chunks
def split_documents(documents):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200,
        separators=["\n\n", "\n", " ", ""]
    )
    split_docs = text_splitter.split_documents(documents)
    return split_docs
chunks = split_documents(doc)

In [10]:
len(chunks)

74

In [11]:
# Load model to generate embeddings:
model = HuggingFaceEmbeddings(model_name="sentence-transformers/sentence-t5-base")

  from tqdm.autonotebook import tqdm, trange
  torch.load(os.path.join(input_path, "pytorch_model.bin"), map_location=torch.device("cpu"))


In [12]:
embedding_vector = model.embed_query('test')
print(f"Embedding size:: {len(embedding_vector)}")

Embedding size:: 768


In [13]:
from qdrant_client.models import Distance, VectorParams
# Create proper collection

# Delete if already exist
client.delete_collection(collection_name="rag_collection")


client.create_collection(
    collection_name="rag_collection",
    vectors_config=VectorParams(size=768, distance=Distance.COSINE),
)

True

In [14]:
 # Initialize Qdrant client
qdrant = QdrantVectorStore(
        client=client,
        collection_name='rag_collection',
        embedding=model
    )

 # Push documents and their embeddings to Qdrant
qdrant.add_documents(chunks)

['bc64f923ec984597bced92874f7d0761',
 'a97093b1a30b42d9b82bcad64d5f534d',
 '19d8cfbd0efb431e8217ef78e38460ab',
 '5dc231b7e7ab49c4b06e208fe68c7cf1',
 'b360b3775bb547dbb962e6c4ad0cab77',
 'f2352a13f8b64d00bb47ba26403297b8',
 'df68ebf9fa4d4272bf4a21337f7a305e',
 'e1b0ea7e3fa540be90e0081ebaaad595',
 '372eceaab0534e2e97aa33b6f2eba944',
 '021508b846f84f919b6a92b04300bfc5',
 '8be295123f12458a9c64aa2e16986bd4',
 'f754dc36ea6e4f8c86656c9fc2544403',
 '226ecaf9b3d048109693e776fec79e00',
 '84301e760f504d0f933c468c92f65311',
 '2796a2b50ea546a18b09e373fcf3487a',
 '900c22c781474645aa80c7d6a506f5b8',
 'd0e76c649f904b05acab5ec3e9b8f233',
 '3e96a59e619a41e68a7f1dba9ca19e24',
 '6ae29908fac64d10af01922778d07cdf',
 'b5717c35e65e4629913dde2ee13551b7',
 'f9163ffbd01142f3b64fb89b0f085e05',
 '5075d9a0976a4f35b7ce86cef5cf9a23',
 '6cf380ce31c8467084c7f984cc614f4e',
 '5e16e2b3234242fc8987872a980df153',
 '9174c31cad4049619ac4cbe765a59d85',
 'a49971d8052f42f9a852abf587d01eca',
 '740468caab7c4abcaf2a9a0c01e8ab29',
 

In [15]:
collection_info = client.get_collection('rag_collection')
print(f"Info: \n {collection_info}")
print("\n")
stats = client.get_collection('rag_collection').points_count
print(f"Number of records: {stats}")

query_result = client.scroll(
    collection_name='rag_collection',
    limit=1,
    with_vectors=True,
)
print("\n")
print(f"First record:\n {query_result}")

Info: 
 status=<CollectionStatus.GREEN: 'green'> optimizer_status=<OptimizersStatusOneOf.OK: 'ok'> vectors_count=None indexed_vectors_count=0 points_count=74 segments_count=8 config=CollectionConfig(params=CollectionParams(vectors=VectorParams(size=768, distance=<Distance.COSINE: 'Cosine'>, hnsw_config=None, quantization_config=None, on_disk=None, datatype=None, multivector_config=None), shard_number=1, sharding_method=None, replication_factor=1, write_consistency_factor=1, read_fan_out_factor=None, on_disk_payload=True, sparse_vectors=None), hnsw_config=HnswConfig(m=16, ef_construct=100, full_scan_threshold=10000, max_indexing_threads=0, on_disk=False, payload_m=None), optimizer_config=OptimizersConfig(deleted_threshold=0.2, vacuum_min_vector_number=1000, default_segment_number=0, max_segment_size=None, memmap_threshold=None, indexing_threshold=20000, flush_interval_sec=5, max_optimization_threads=None), wal_config=WalConfig(wal_capacity_mb=32, wal_segments_ahead=0), quantization_conf

# RAG

In [16]:
retriever = qdrant.as_retriever()
llm = OllamaLLM(model='llama3.2')

In [17]:
rag_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
)

# Run RAG pipeline
query = "What is the capital of France?"
response = rag_chain.invoke(query)
print(response)

{'query': 'What is the capital of France?', 'result': 'I don\'t know how to answer this question based on the provided context about Graph Neural Networks (GNNs) in Computer Vision. The text appears to be a collection of research papers and references related to GNNs applications, but it does not contain any information about geography or answering questions like "What is the capital of France?".'}


In [18]:
def query_rag(rag_chain, query):
    return rag_chain.invoke(query)['result']
def compare_rag_llm(query):
    rag_result  = query_rag(rag_chain, query)
    llm_result = query_llama(query)
    
    print("Answer RAG:")
    print(rag_result)
    print("Answer LLM:")
    print(llm_result)

In [19]:
compare_rag_llm("What graph neural networks take as input?")

Answer RAG:
According to the context, Graph Convolutional Networks (GCNs) and other graph neural network models can work directly with graphs and their structural information.
Answer LLM:
Graph Neural Networks (GNNs) typically take the following inputs:

1. **Adjacency Matrix**: The adjacency matrix of the graph, which represents the connections between nodes in the graph. In this matrix, each entry represents the weight or strength of the connection between two nodes.
2. **Node Features**: One or more vectors representing the attributes or features of each node in the graph. These features can be categorical, numerical, or a combination of both.
3. **Optional: Edge Features**: Additional vectors representing the attributes or features of edges in the graph. These features can provide additional context about the connections between nodes.

The input to GNNs is usually represented as a triplet:

* V (node set): The set of all nodes in the graph
* A (edge set): The set of all edges in t

In [20]:
compare_rag_llm("What is the difference between dynamic and static graph?")

Answer RAG:
I don't know the answer to this question based on the provided context. The text discusses various Graph Neural Network (GNN) architectures, but it does not explicitly define or distinguish between "dynamic" and "static" graphs. It mentions that some GNNs can handle partial relationship information, but I couldn't find any direct explanation of the difference between dynamic and static graphs.
Answer LLM:
A dynamic graph and a static graph are two types of graph data structures that differ in how they store and manage their vertices (nodes) and edges.

**Static Graph:**

In a static graph, all the vertices and edges are stored in memory at once. When you add or remove vertices or edges, it requires updating the entire graph data structure. This can be inefficient for large graphs, as it involves copying or modifying a significant amount of data.

Characteristics of a static graph:

* All vertices and edges are stored in memory simultaneously.
* Adding or removing vertices o

In [21]:
compare_rag_llm("Explain how does ST- GCN works")

Answer RAG:
Unfortunately, I don't know how ST-GCN (Spatial-Temporal Graph Convolutional Network) works, as it is not explicitly described in the provided text. The text only provides a general overview of Spatial-Temporal Graph Neural Networks (STGNNs) and mentions that ST-GCN alleviates the sequential nature of PolygonRNN by predicting all vertices simultaneously using a Graph Convolutional Network (GCN), but it does not provide further details on how this is implemented.
Answer LLM:
ST-GCN (Spatial-Temporal Graph Convolutional Network) is a type of neural network architecture that combines spatial and temporal graph convolutional layers to process data on graphs. Here's an overview of how it works:

**Architecture**

The ST-GCN architecture consists of two main components:

1. **Spatial Graph Convolution Layer**: This layer applies graph convolution operations to the input graph, treating nodes as independent features.
2. **Temporal Graph Convolution Layer**: This layer applies grap

In [22]:
compare_rag_llm("Can we use Graph Neural Network in Computer Vision? How?")

Answer RAG:
Yes, we can use Graph Neural Networks (GNNs) in Computer Vision. According to the context, GNNs are a family of graph networks inspired by mechanisms existing between nodes on a graph, and they have been increasingly used in computer vision for various applications such as:

* Biometrics
* Face recognition
* Gesture recognition
* Body pose recognition
* Video analysis and understanding
* Action and behavior recognition
* Computational photography
* Image and video synthesis from zero or few shots

GNNs can help express and provide a more explainable representation of data, making them a useful tool in computer vision. The paper being discussed aims to collect papers on GNN-based approaches towards computer vision, highlighting their architectures, datasets, and common approaches.

However, the question "How?" is not explicitly answered in the provided context. If you're looking for specific techniques or methods used to apply GNNs in computer vision, it would require furthe

In [23]:
compare_rag_llm("What are most common benchmark datasets to test GNN on?")

Answer RAG:
The text provides a list of the most commonly used benchmark datasets for testing Graph Neural Networks (GNNs) in computer vision, as presented in Table I. The datasets mentioned include:

1. NTU-RGBD - Action and behavior recognition
2. CUHK03 - Biometrics, face, gesture, body pose
3. Skeleton-Kinetics - Action and behavior recognition
4. Visual Genome (Scene analysis and Low-level and physics-based vision)
5. ShanghaiTech - Video analysis and understanding
6. COCO (Recognition) - Object detection and categorization
7. MS-Caleb-1M - Computational photography, image and video synthesis

These datasets are commonly used in GNN-based computer vision studies.
Answer LLM:
Most Common Benchmark Datasets for GNN:

1. **Cora**: A graph classification dataset consisting of 2708 nodes and 17 features. It is widely used as a benchmark for evaluating the performance of Graph Neural Networks (GNNs).
2. **CiteSeer**: Similar to Cora, but with more nodes (2660) and edges (1664). It is al

# Wnioski
Sam LLM (Ollama) w moim przypadku ma tendencje do znacznie dlużyszch odpowiedzi. Niekiedy może być to na plus, niekiedy na minus. Podobnie jak odpowiadanie na pytania których nie ma lub częściowo nie ma w bazie danych. RAG, nie odpowiada i informuje nas o tym, że brak mu info, jednak LLM praktycznie zawsze odpowie, nie musimy się tutaj obawiac brakiem informacjiw  bazie jednak narażamy się na halucynacje. Niemniej, był to RAG robiony dla zadania QA, co za tym idzie myśle, że częsciej be∂zie nas interesowało QA z jakimś konktekstem i wykorzysytwaniem ograniczonych informacji, w tym celu RAG wydaje się być lepszą opcją. Mamy tutaj dostęp i odpowiedzi na pytania z naszej bazy, jesli ich nie ma to też jestesmy o tym informowani. Jeśli chcielibyśmy zadwać pytania na różne tematy, coś ala Open-Domain QA, penwie skorzystamy z gotowych rozwiązań (Ogromnych LLM a nie naszych mneijszych) ew. wyszukiwarki google po prostu a nie będziemy implementować RAG. 


# Questions
- How does RAG improve the quality and reliability of LLM responses compared to pure LLM generation?
    RAG odpowiadał tylko na pytania do których miał jasne odpowiedzi w texcie, odpowaidał dobrze i na bazei kontekstu z bazy danych.
- What are the key factors affecting RAG performance (chunk size, embedding quality, prompt design)?
    - wiekszy chunk wiekszy kontekts, jednak dluzej zajmuje przetworzenie, ponadto moze utrudniac to zwiezle i krotkie odpowiedzi
    - Im lepsze embeddingi tym lepsza separacja danych i rozroznienie, dobre embeddingi zawsze na plus
    - prompt design to do ostatniej części - LLM, im lepszy i dokładniejszy prompt model zwróci nam odpowiedz we formacie jaki chcemy i to co chcemy.
- How does the choice of vector database and embedding model impact system performance?
    - Baza danych odpowiada głównie za wydajność (szybkość) działania, embeddingi za jakość (jak separowalne lub nie są dane)
- What are the main challenges in implementing a production-ready RAG system?
    - Budowanie dużej bazy danych, indexowanie jej, i używanie embeddingow które najlepiej pasują do naszego use-case
- How can the system be improved to handle complex queries requiring multiple document lookups
    - Mozemy pozowlic systemowi aby wyciagal kilka dokumentow, nastepnie laczyc je w jeden duzy kontekst i dopiero z tego duzego kontekstu poprosic o ostateczna odpowiedz.