In [1]:
!pip install transformers sentence-transformers langchain-community langchain torch faiss-cpu numpy pypdf einops --quiet

In [1]:
!nvidia-smi

Thu Jun 19 23:11:25 2025       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.230.02             Driver Version: 535.230.02   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:05.0 Off |                    0 |
| N/A   39C    P8              11W /  70W |      2MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [3]:
# Importing Necessary Libraries

import numpy as np
import torch

from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.document_loaders import PyPDFLoader, PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate


In [4]:
!pwd

/teamspace/studios/this_studio/RAG


### Parsing PDF

In [8]:
loader = PyPDFLoader("/teamspace/studios/this_studio/RAG/pdfs/TR-03109-5_Testspezifikation_german.pdf")

docs_before_split = loader.load()
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 500,
    chunk_overlap  = 50,
)
docs_after_split = text_splitter.split_documents(docs_before_split)

print(docs_after_split[0])

page_content='TS-03109-5
Testspezifikation zur Technischen
Richtlinie TR-03109-5
Version 1.1.1
Datum 11.06.2024' metadata={'producer': 'PyPDF', 'creator': 'PyPDF', 'creationdate': '', 'author': 'Bundesamt für Sicherheit in der Informationstechnik', 'keywords': 'Testspezifikation, TR-03109-5', 'subject': '', 'title': 'TS-03109-5 - Testspezifikation zur Technischen Richtlinie TR-03109-5 - Version 1.1.1 - Datum 11.06.2024', 'source': '/teamspace/studios/this_studio/RAG/pdfs/TR-03109-5_Testspezifikation_german.pdf', 'total_pages': 132, 'page': 0, 'page_label': 'i'}


In [9]:
len(docs_after_split)

629

In [10]:
avg_doc_length = lambda docs: sum([len(doc.page_content) for doc in docs])//len(docs)
avg_char_before_split = avg_doc_length(docs_before_split)
avg_char_after_split = avg_doc_length(docs_after_split)

print(f'Before split, there were {len(docs_before_split)} documents loaded, with average characters equal to {avg_char_before_split}.')
print(f'After split, there were {len(docs_after_split)} documents (chunks), with average characters equal to {avg_char_after_split} (average chunk length).')

Before split, there were 132 documents loaded, with average characters equal to 1948.
After split, there were 629 documents (chunks), with average characters equal to 423 (average chunk length).


In [11]:
device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

### Embedding model

In [12]:
huggingface_embeddings = HuggingFaceEmbeddings(
    model_name="jinaai/jina-embeddings-v3",  # alternatively use "sentence-transformers/all-MiniLM-l6-v2" for a light and faster experience.
    model_kwargs={'device':device,
    "trust_remote_code": True}, 
    encode_kwargs={'normalize_embeddings': True}
)


  huggingface_embeddings = HuggingFaceEmbeddings(
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyTorch native attention implementation.
flash_attn is not installed. Using PyT

In [13]:
sample_embedding = np.array(huggingface_embeddings.embed_query(docs_after_split[0].page_content))
print("Sample embedding of a document chunk: ", sample_embedding)
print("Size of the embedding: ", sample_embedding.shape)


Sample embedding of a document chunk:  [ 0.12109375 -0.01940918  0.05224609 ... -0.01586914  0.00228882
  0.00653076]
Size of the embedding:  (1024,)


### Creating Vector index: FAISS

In [14]:
vectorstore = FAISS.from_documents(docs_after_split, huggingface_embeddings)
vectorstore.save_local("Faiss_index_german")

# vectorstore=FAISS.load_local("Faiss_index",huggingface_embeddings,allow_dangerous_deserialization=True)

In [15]:
query = """What are the system log for (FAU_SAR)? """  
relevant_documents = vectorstore.similarity_search(query)
print(f'Total Documents retrieved: {len(relevant_documents)} \n ---First one:---\n')
print(relevant_documents[0].page_content)


Total Documents retrieved: 4 
 ---First one:---

• REQ.FA.ImportSmgwTrustAnchor.40
• REQ.FAKAT.SmgwAssociation.40
Relevante Implementation-Conformance-Statements (ICS)
• ICS.IOP.HKS.TLSPROXY.20
Vorbedingungen
Status Bedeutung
ClsDeviceIsUnpaired Das CLS-Gerät hat noch nicht mit einem SMGW kommuniziert bzw. es wurde zurück-
gesetzt und hat seitdem nicht wieder mit einem SMGW kommuniziert.
Tabelle 4.62 Status
Testfallparameter
• CurrentActiveEmt: Der derzeit ausgewählte aktive Externe Marktteilnehmer.


In [16]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 4})

In [17]:
from huggingface_hub import login
from tokens import HF_TOKEN
login(token =HF_TOKEN)


### Loading LLM: llama3.2- 1B

In [18]:
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline

llm = HuggingFacePipeline.from_model_id(
    model_id="meta-llama/Llama-3.2-1B-Instruct",
    device=0, # CUDA id and for cpu use -1
    task="text-generation",
    pipeline_kwargs={"temperature": 0.1, "max_new_tokens": 500}
)

llm.invoke(query)


Device set to use cuda:0


'What are the system log for (FAU_SAR)?  The system log for (FAU_SAR) is not available in the system log.  The system log for (FAU_SAR) is not available in the system log.\nThe system log for (FAU_SAR) is not available in the system log.  The system log for (FAU_SAR) is not available in the system log.\nThe system log for (FAU_SAR) is not available in the system log.  The system log for (FAU_SAR) is not available in the system log.\nThe system log for (FAU_SAR) is not available in the system log.  The system log for (FAU_SAR) is not available in the system log.\nThe system log for (FAU_SAR) is not available in the system log.  The system log for (FAU_SAR) is not available in the system log.\nThe system log for (FAU_SAR) is not available in the system log.  The system log for (FAU_SAR) is not available in the system log.\nThe system log for (FAU_SAR) is not available in the system log.  The system log for (FAU_SAR) is not available in the system log.\nThe system log for (FAU_SAR) is not

In [42]:
query = """Hallo Wie ghets?"""  # Sample question, change to other questions you are interested in.
llm.invoke(query)


'Hallo Wie ghets? Ich bin ein Student und ich habe gerade meine Hausaufgaben für das 2. Semester gestellt. Ich bin sehr müde und habe keine Zeit, mich um meine Hausaufgaben zu kümmern. Kannst du mir helfen?\n\nIch bin ein Student und ich habe gerade meine Hausaufgaben für das 2. Semester gestellt. Ich bin sehr müde und habe keine Zeit, mich um meine Hausaufgaben zu kümmern. Kannst du mir helfen?\n\nHallo! Ich bin auch ein Student und ich habe gerade meine Hausaufgaben für das 2. Semester gestellt. Ich bin sehr müde und habe keine Zeit, mich um meine Hausaufgaben zu kümmern. Kannst du mir helfen?\n\nHallo! Ich bin auch ein Student und ich habe gerade meine Hausaufgaben für das 2. Semester gestellt. Ich bin sehr müde und habe keine Zeit, mich um meine Hausaufgaben zu kümmern. Kannst du mir helfen?\n\nHallo! Ich bin auch ein Student und ich habe gerade meine Hausaufgaben für das 2. Semester gestellt. Ich bin sehr müde und habe keine Zeit, mich um meine Hausaufgaben zu kümmern. Kannst du m

In [32]:
prompt_template = """Use the following pieces of context to answer the question at the end. Please follow the following rules:
1. If you don't know the answer, don't try to make up an answer. Just say "I can't find the final answer".
2. If you find the answer, write the answer in a concise way.
3. Question will be in either English or German Language. Answer in the same language as of question.

{context}

Question: {question}

Helpful Answer:
"""

PROMPT = PromptTemplate(
 template=prompt_template, input_variables=["context", "question"]
)


In [33]:
retrievalQA = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=False,
    chain_type_kwargs={"prompt": PROMPT}
)

In [21]:
retrievalQA(query)

  retrievalQA(query)


{'query': 'What are the system log for (FAU_SAR)? ',
 'result': 'Use the following pieces of context to answer the question at the end. Please follow the following rules:\n1. If you don\'t know the answer, don\'t try to make up an answer. Just say "I can\'t find the final answer".\n2. If you find the answer, write the answer in a concise way.\n3. Answer in the same language as of question. It will be in either English or German Language. \n\n• REQ.FA.ImportSmgwTrustAnchor.40\n• REQ.FAKAT.SmgwAssociation.40\nRelevante Implementation-Conformance-Statements (ICS)\n• ICS.IOP.HKS.TLSPROXY.20\nVorbedingungen\nStatus Bedeutung\nClsDeviceIsUnpaired Das CLS-Gerät hat noch nicht mit einem SMGW kommuniziert bzw. es wurde zurück-\ngesetzt und hat seitdem nicht wieder mit einem SMGW kommuniziert.\nTabelle 4.62 Status\nTestfallparameter\n• CurrentActiveEmt: Der derzeit ausgewählte aktive Externe Marktteilnehmer.\n\n• REQ.FA.ImportClsKeyPairAndCert.40\n• REQ.FA.ImportClsKeyPairAndCert.50\n• REQ.FAKAT

In [34]:
def ask_question(query):
    qa_chain = RetrievalQA.from_llm(
        llm, retriever=vectorstore.as_retriever(), prompt=PROMPT,return_source_documents=False
    )
    out=qa_chain(query)["result"]
    out=out.split("Helpful Answer:")[-1].strip()

    return out

query="""What are the system log for (FAU_SAR)? """  
print(ask_question(query))

I can't find the final answer.


## Testing

In [37]:
#German Query

query="""Was ist der Testfall, um die Fähigkeit des Objekts zur Durchführung der Werkseinstellung zu überprüfen?"""  
print(ask_question(query))

4.4.1 TC.CLS.MGMT.MustDoFactoryResetClsAsClient
Version: 1.0.0
Zweck
Der Testfall prüft, ob der Prüfgegenstand dazu in der Lage ist, einen Reset auf Werkseinstellungen durchzu-
führen.


In [27]:
# short ambigous query

query="""MustDoFactoryResetClsAsClient Zweck"""  
print(ask_question(query))

MustDoFactoryResetClsAsClient Zweck
Der Zweck des MustDoFactoryResetClsAsClient ist es, den Prüfgegenstand in der Lage zu machen, einen Reset auf Werkseinstellungen durchzuführen.


In [None]:
# English query

query="""give me the test case for Factory setting"""  
print(ask_question(query))

The purpose of the Factory reset in the test case is to reset the CLS-Gerät (CLS-Gerät) to its default settings, which are typically set by the manufacturer. This is usually done to ensure that the system is in a clean and secure state,


In [66]:
## Checking on False cases

query="""Hallo wie ghets """  
print(ask_question(query))

I can't find the final answer.


In [38]:

query="""Explain in brief, What this pdf is about?"""  
print(ask_question(query))

This PDF is about the ICS.IOP.HKS.TLSPROXY.20 and ICS.IOP.HKS.TLSPROXY.10 documents, which are related to the Bundesamt für Sicherheit in der Informationstechnik (BASIS) and the Bundesamt für Sicherheit in der Informationstechnik 27 (BASIS 27).
