# GDPR Compliance Assistant - RAG Agent Implementation

This notebook implements the QA agent for the GDPR Compliance Assistant using your existing Pinecone vector database.



## Setup and Imports

First, let's install required packages and import dependencies.

In [None]:
# First, make sure you have the latest LangChain
# pip install langchain-core langchain-openai

# Cell 1: Setup and Imports
import os
import sys
from dotenv import load_dotenv

# Add project root to Python path
sys.path.append(os.path.abspath('..'))

# LangChain components
from langchain_openai import OpenAIEmbeddings, ChatOpenAI  # ‚úÖ Correct imports
from langchain_pinecone import PineconeVectorStore  # ‚úÖ Pinecone integration
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

from pinecone import Pinecone, ServerlessSpec

import time


# from langchain.chains import RetrievalQA
# from langchain.vectorstores import Pinecone
# from langchain.embeddings import OpenAIEmbeddings
# from langchain_openai import ChatOpenAI
# from langchain.prompts import PromptTemplate
# from langchain_pinecone import PineconeVectorStore

# Load environment variables
load_dotenv()

print("‚úÖ All packages imported successfully!")

‚úÖ All packages imported successfully!



For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  from langchain_pinecone.vectorstores import Pinecone, PineconeVectorStore



## Configuration / Environment Setup

Set up your API keys and configuration. Replace with your actual values.

In [2]:
# Configure your API keys
def setup_environment():
    # Check if API keys are already in environment
    OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
    PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
    
    # If not set, prompt user
    if not OPENAI_API_KEY:
        OPENAI_API_KEY = getpass.getpass("Enter your OpenAI API key: ")
        os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
    
    if not PINECONE_API_KEY:
        PINECONE_API_KEY = getpass.getpass("Enter your Pinecone API key: ")
        os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY
    
    # Your Pinecone index name (replace with your actual index name)
    index_name = "gdpr-compliance-openai"  # Change this to your index name
    
    return index_name, OPENAI_API_KEY, PINECONE_API_KEY

index_name, OPENAI_API_KEY, PINECONE_API_KEY = setup_environment()
print(f"üîë API keys configured")
print(f"üìÅ Using Pinecone index: {index_name}")

üîë API keys configured
üìÅ Using Pinecone index: gdpr-compliance-openai


In [None]:
# ---------------------------
# Pinecone Initialization (Current 2025 syntax)
# ---------------------------
def init_pinecone(api_key: str, index_name: str = "gdpr-compliance-openai", environment: str = "us-east-1"):
    """
    Initialize Pinecone connection using current Pinecone
    """
    if not api_key:
        raise ValueError("PINECONE_API_KEY is missing!")
    
    # Initialize Pinecone (Current API)
    print("üîå Initializing Pinecone...")
    # from pinecone import Pinecone, ServerlessSpec
    pc = Pinecone(api_key=api_key)
    print("‚úÖ Pinecone initialized successfully")
    
    # Check if index exists
    if index_name in pc.list_indexes().names():
        print(f"‚úÖ Index '{index_name}' exists")
        # Wait for index to be ready
        while not pc.describe_index(index_name).status.ready:
            print("‚è≥ Waiting for index to be ready...")
            # import time
            time.sleep(1)
    else:
        print(f"‚ö†Ô∏è  Index '{index_name}' not found.")
        # print(f"‚ö†Ô∏è  Index '{index_name}' not found. Creating it...")
        # pc.create_index(
        #     name=index_name,
        #     dimension=1536,  # OpenAI text-embedding-3-small dimension
        #     metric="cosine",
        #     spec=ServerlessSpec(cloud="aws", region=environment)
        # )
        # print(f"‚úÖ Index '{index_name}' created")
    
    # Get the index object
    index = pc.Index(index_name)
    return pc, index

In [5]:
pc, index = init_pinecone(
        api_key=PINECONE_API_KEY,
        index_name=index_name)
print("‚úÖ Pinecone setup completed!")


üîå Initializing Pinecone...
‚úÖ Pinecone initialized successfully
‚úÖ Index 'gdpr-compliance-openai' exists
‚úÖ Pinecone setup completed!


## Initialize embeddings

In [None]:
# Initialize embeddings with CURRENT syntax - NO DEPRECATION WARNING
embeddings = OpenAIEmbeddings(
    model="text-embedding-3-small",
    openai_api_key=OPENAI_API_KEY
)
print("‚úÖ Embeddings initialized successfully")

‚úÖ Embeddings initialized successfully


## Initialize Vector Store Connection

In [9]:
index_name

'gdpr-compliance-openai'

In [10]:
vector_store = PineconeVectorStore(
        index=index,  # Use the index object from our initialization
        embedding=embeddings,
        text_key="text"  # This should match your upload metadata field name
    )
    
print("‚úÖ LangChain successfully connected to Pinecone index!")

‚úÖ LangChain successfully connected to Pinecone index!


## Test the connection with current syntax


In [None]:
# # Test the connection with current syntax
# test_query = "Telephone number from a client"
# test_results = vector_store.similarity_search("test_query", k=2)
# print(f"üìö Test retrieval found {len(test_results)} documents")

# # Show metadata structure (useful for debugging)
# if test_results:
#     print(f"üìã Available metadata fields: {list(test_results[0].metadata.keys())}")
#     print(f"üìÑ Sample content: {test_results[0].page_content[:150]}...")
    
# # Alternative: Check what's in the vector store
# print(f"\nüîç Vector store type: {type(vector_store)}")

In [26]:
# # Test the connection with current syntax
# test_results = vector_store.similarity_search("Datenschutz", k=2)
# print(f"üìö Test retrieval found {len(test_results)} documents")

# # Show metadata structure (useful for debugging)
# if test_results:
#     print(f"üìã Available metadata fields: {list(test_results[0].metadata.keys())}")
#     print(f"üìÑ Sample content: {test_results[0].page_content[:150]}...")
    
# # Alternative: Check what's in the vector store
# print(f"\nüîç Vector store type: {type(vector_store)}")

üìö Test retrieval found 2 documents
üìã Available metadata fields: ['author', 'chunk_id', 'chunk_size', 'content_category', 'content_length', 'creationdate', 'document_name', 'document_type', 'language', 'moddate', 'page', 'page_label', 'page_number', 'section_type', 'source', 'total_chunks', 'total_pages']
üìÑ Sample content: Leitfaden 
Datenschutzrecht 
Was Betriebe zu beachten haben 
 
 
Stand: November 2020 
 
Abteilung Organisation und Recht...

üîç Vector store type: <class 'langchain_pinecone.vectorstores.PineconeVectorStore'>


## Verify Data and Create Retriever

In [19]:
retriever=vector_store.as_retriever()

In [8]:
# # Verify data and create retriever with current syntax
# print("üîç Setting up retriever...")

# # Create retriever with current syntax
# retriever = vector_store.as_retriever(
#     search_type="similarity",
#     search_kwargs={
#         "k": 3,  # Number of documents to retrieve
#         "score_threshold": 0.7  # Optional: minimum similarity score
#     }
# )

# print("‚úÖ Retriever configured!")
# print(f"   - Search type: similarity")
# print(f"   - k: 3 documents")
# print(f"   - score_threshold: 0.7")

# # Test the retriever
# print("\nüß™ Testing retriever...")
# test_docs = retriever.invoke("Datenverarbeitung Grunds√§tze")
# print(f"‚úÖ Retriever test successful - found {len(test_docs)} documents")

üîç Setting up retriever...
‚úÖ Retriever configured!
   - Search type: similarity
   - k: 3 documents
   - score_threshold: 0.7

üß™ Testing retriever...
‚úÖ Retriever test successful - found 3 documents


## Current LLM Setup

In [39]:
# Initialize LLM with current syntax
print("üöÄ Initializing GPT-5 Nano LLM...")

llm_5_nano = ChatOpenAI(
    model="gpt-5-nano",
    temperature=0,
    max_tokens=500,
    openai_api_key=OPENAI_API_KEY,
)

print("‚úÖ LLM initialized with current syntax!")
print(f"   - Model: gpt-5-nano")
print(f"   - Temperature: 0") 
print(f"   - Max tokens: 500")

üöÄ Initializing GPT-5 Nano LLM...
‚úÖ LLM initialized with current syntax!
   - Model: gpt-5-nano
   - Temperature: 0
   - Max tokens: 500


In [16]:
# test with gpt-3.5-turbo
print("üöÄ Testing with GPT-3.5-Turbo LLM...")

llm_3_turbo = ChatOpenAI(
    openai_api_key=OPENAI_API_KEY,
    model_name='gpt-3.5-turbo',
    temperature=0.0,
    max_tokens=500,
)

üöÄ Testing with GPT-3.5-Turbo LLM...


## Create QA Chain

In [27]:
query_test = "How long can i store my client's email?"

results_test = vector_store.similarity_search(
    query_test,  # our search query
    k=3  # return 3 most relevant docs
)

In [31]:
results_test

[Document(id='1c200008-177f-4b67-85c1-b9bcc5d22d58', metadata={'author': 'Kasper, Lisa', 'chunk_id': 99.0, 'chunk_size': 727.0, 'content_category': 'customer_data', 'content_length': 2176.0, 'creationdate': '2020-11-06T11:24:59+01:00', 'document_name': 'ZDH_LEITFADEN_DATENSCHUTZ_BETRIEBE_HANDWERKER.pdf', 'document_type': 'zdh_gdpr_handbook', 'language': 'german', 'moddate': '2020-11-06T11:24:59+01:00', 'page': 34.0, 'page_label': '35', 'page_number': 35.0, 'section_type': 'content', 'source': '../2_data/raw/ZDH_LEITFADEN_DATENSCHUTZ_BETRIEBE_HANDWERKER.pdf', 'total_chunks': 266.0, 'total_pages': 99.0}, page_content='Gesetzliche L√∂schfristen  \n \nIn vereinzelten F√§llen schreiben gesetzliche Regelungen vor, wann bestimmte Daten zu l√∂-\nschen sind (f√ºr eine √ú bersicht gesetzlicher L√∂schfristen siehe die Anlage 17). Eine l√§ngere \nAufbewahrung solcher Daten ist unzul√§ssig.  \n \nEtwas anderes gilt nur dann, wenn die Daten zu einem anderen Zweck als zu dem, zu dem \nsie urspr√ºngli

In [30]:
for i, doc in enumerate(results_test):
	print(f"Document {i+1} content:\n{doc.page_content}\n{'-'*60}")

Document 1 content:
Gesetzliche L√∂schfristen  
 
In vereinzelten F√§llen schreiben gesetzliche Regelungen vor, wann bestimmte Daten zu l√∂-
schen sind (f√ºr eine √ú bersicht gesetzlicher L√∂schfristen siehe die Anlage 17). Eine l√§ngere 
Aufbewahrung solcher Daten ist unzul√§ssig.  
 
Etwas anderes gilt nur dann, wenn die Daten zu einem anderen Zweck als zu dem, zu dem 
sie urspr√ºnglich erhoben wurden, weiterhin ben√∂tigt wer den. Eine solche Zweck√§nderung 
oder Zweckerweiterung ist jedoch an gesetzliche Zul√§ssigkeitsvoraussetzungen gebunden 
(Art. 6 Abs. 4 DSGVO).  
 
Beispiel: 
Kundendaten werden nach Ablauf der Gew√§hrleistungsfristen und der steuerrechtlichen 
Aufbewahrungspflichten ‚Äì d.h. nach zehn Jahren ‚Äì nicht mehr zur Abwicklung des Ver-
------------------------------------------------------------
Document 2 content:
Anlage 17 
 
Aufbewahrungs- und L√∂schfristen 
 
Die Liste stellt eine √úbersicht praxisrelevanter Verfahren dar und erhebt keinen An-
spruch auf Vollst√§

In [13]:
# ---------------------------
# Code adapted from lesson:

qa_test = RetrievalQA.from_chain_type(
    llm=llm_3_turbo,
    chain_type="stuff",
    retriever=vector_store.as_retriever()
)

query_test = "How long can i store my client's email?"

print(qa_test.invoke(query_test))


{'query': "How long can i store my client's email?", 'result': "According to the information provided, there is no specific legal requirement for how long you can store your client's email. It is generally up to the discretion of the data controller, which in this case would be the business that collected the data. However, it is recommended to establish a data retention policy or a deletion concept to determine when to delete data, taking into account legal requirements and best practices."}


In [36]:
# Create prompt template and QA chain with current syntax
print("üîó Creating QA chain...")

# Current prompt template
prompt_template_de = """Du bist ein spezialisierter Assistent f√ºr Datenschutzfragen f√ºr Handwerksbetriebe
.

Antworte AUF DEUTSCH basierend auf dem bereitgestellten Kontext. 
Sei pr√§zise und fokussiere auf die praktische Umsetzung f√ºr Handwerksbetriebe.

Kontext: {context}

Frage: {question}

Antwort (pr√§zise, praxisorientiert):"""

PROMPT_de = PromptTemplate(
    template=prompt_template_de, 
    input_variables=["context", "question"]
)

# Create QA chain with current syntax
qa_chain_de = RetrievalQA.from_chain_type(
    llm=llm_3_turbo,
    chain_type="stuff",
    retriever=retriever,
    chain_type_kwargs={"prompt": PROMPT_de},
    return_source_documents=True
)


üîó Creating QA chain...


In [58]:
# Create prompt template and QA chain with current syntax
print("üîó Creating QA chain...")

# Current prompt template
prompt_template_en = """You are a privacy assistant specialized in GDPR for small craft businesses. 
Explain in a clear, practical, and easy-to-understand way based on the following context. 
This is not legal advice. If the context does not contain the answer, say so openly.

Context:
{context}

Question:
{question}

Answer (short and practical):"""

PROMPT_en = PromptTemplate(
    template=prompt_template, 
    input_variables=["context", "question"]
)

# Create QA chain with current syntax
qa_chain_en = RetrievalQA.from_chain_type(
    llm=llm_3_turbo,
    chain_type="stuff",
    retriever=retriever,
    chain_type_kwargs={"prompt": PROMPT_en},
    return_source_documents=True
)


üîó Creating QA chain...


In [42]:
# Create prompt template and QA chain with current syntax
print("üîó Creating QA chain with 'gpt-5-nano' ...")

# Current prompt template
prompt_template_en = """You are a privacy assistant specialized in GDPR for small craft businesses. 
Explain in a clear, practical, and easy-to-understand way based on the following context. 
This is not legal advice. If the context does not contain the answer, say so openly.

Context:
{context}

Question:
{question}

Answer (short and practical):"""

PROMPT_en = PromptTemplate(
    template=prompt_template, 
    input_variables=["context", "question"]
)

# Create QA chain with current syntax
qa_chain_en_5_nano = RetrievalQA.from_chain_type(
    llm=llm_5_nano,
    chain_type="stuff",
    retriever=retriever,
    chain_type_kwargs={"prompt": PROMPT_en},
    return_source_documents=True
)

üîó Creating QA chain with 'gpt-5-nano' ...


## Create a helper function to test the agent and display results.


In [59]:
def ask_gdpr_question_en(question, show_sources=True):
    """
    Ask a question to the GDPR assistant and display the response with sources.
    
    Args:
        question (str): The question to ask (in German or English)
        show_sources (bool): Whether to display source documents
    
    Returns:
        dict: Complete result with answer and source documents
    """
    print(f"‚ùì Question: {question}")
    print("‚è≥ Denke nach...")
    
    # Get answer from QA chain
    result = qa_chain_en.invoke({"query": question})

    # Check if we got a valid answer
    answer = result.get('result', '').strip()
    
    print(f"‚úÖ Answer: {result['result']}")
    
    # Show source documents if requested
    if show_sources and result['source_documents']:
        print(f"\nüìö Source ({len(result['source_documents'])}):")
        for i, doc in enumerate(result['source_documents']):
            source_text = doc.page_content.replace('\n', ' ').strip()
            print(f"   {i+1}. {source_text[:150]}...")
    
    print("‚Äï" * 80)
    return result


In [37]:
def ask_gdpr_question_de(question, show_sources=True):
    """
    Ask a question to the GDPR assistant and display the response with sources.
    
    Args:
        question (str): The question to ask (in German or English)
        show_sources (bool): Whether to display source documents
    
    Returns:
        dict: Complete result with answer and source documents
    """
    print(f"‚ùì Frage: {question}")
    print("‚è≥ Denke nach...")
    
    # Get answer from QA chain
    result = qa_chain_de.invoke({"query": question})

    # Check if we got a valid answer
    answer = result.get('result', '').strip()
    
    print(f"‚úÖ Antwort: {result['result']}")
    
    # Show source documents if requested
    if show_sources and result['source_documents']:
        print(f"\nüìö Verwendete Quellen ({len(result['source_documents'])}):")
        for i, doc in enumerate(result['source_documents']):
            source_text = doc.page_content.replace('\n', ' ').strip()
            print(f"   {i+1}. {source_text[:150]}...")
    
    print("‚Äï" * 80)
    return result


In [None]:
def ask_gdpr_question_en_5_nano(question, show_sources=True):
    """
    Ask a question to the GDPR assistant and display the response with sources.
    
    Args:
        question (str): The question to ask (in German or English)
        show_sources (bool): Whether to display source documents
    
    Returns:
        dict: Complete result with answer and source documents
    """
    print(f"‚ùì Question: {question}")
    print("‚è≥ Denke nach...")
    
    # Get answer from QA chain
    result = qa_chain_en_5_nano.invoke({"query": question})

    # Check if we got a valid answer
    answer = result.get('result', '').strip()
    
    print(f"‚úÖ Answer: {result['result']}")
    
    # Show source documents if requested
    if show_sources and result['source_documents']:
        print(f"\nüìö Source ({len(result['source_documents'])}):")
        for i, doc in enumerate(result['source_documents']):
            source_text = doc.page_content.replace('\n', ' ').strip()
            print(f"   {i+1}. {source_text[:150]}...")
    
    print("‚Äï" * 80)
    return result


In [22]:

# qa_chain_eng = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, chain_type_kwargs={"prompt": "{query}"}, return_source_documents=True)
print(qa_chain_en.invoke({"query": "How long can i store my client's email?"}))

# print("‚úÖ QA chain created successfully!")

{'query': "How long can i store my client's email?", 'result': "You can store your client's email for as long as it is necessary for the purpose for which it was originally collected. If the email is no longer needed for that purpose, it should be deleted according to legal requirements and best practices.", 'source_documents': [Document(id='1c200008-177f-4b67-85c1-b9bcc5d22d58', metadata={'author': 'Kasper, Lisa', 'chunk_id': 99.0, 'chunk_size': 727.0, 'content_category': 'customer_data', 'content_length': 2176.0, 'creationdate': '2020-11-06T11:24:59+01:00', 'document_name': 'ZDH_LEITFADEN_DATENSCHUTZ_BETRIEBE_HANDWERKER.pdf', 'document_type': 'zdh_gdpr_handbook', 'language': 'german', 'moddate': '2020-11-06T11:24:59+01:00', 'page': 34.0, 'page_label': '35', 'page_number': 35.0, 'section_type': 'content', 'source': '../2_data/raw/ZDH_LEITFADEN_DATENSCHUTZ_BETRIEBE_HANDWERKER.pdf', 'total_chunks': 266.0, 'total_pages': 99.0}, page_content='Gesetzliche L√∂schfristen  \n \nIn vereinzelte

## Test the RAG System

Now let's test the system with various GDPR questions.


In [24]:
# Test 1: Data retention periods
print("üß™ TEST 1 ('gpt-3.5-turbo'): Data retention periods: english Q > EN temp prompt > A english?")
result2 = ask_gdpr_question_en("How long can i keep a client's email stored?")

üß™ TEST 1: Aufbewahrungsfristen
‚ùì Frage: How long can i keep a client's email stored?
‚è≥ Denke nach...
‚úÖ Answer: You can keep a client's email stored for as long as it is necessary for the purpose for which it was originally collected. After that, you should delete it unless there are legal requirements or other legitimate reasons for keeping it.

üìö Source (4):
   1. Gesetzliche L√∂schfristen     In vereinzelten F√§llen schreiben gesetzliche Regelungen vor, wann bestimmte Daten zu l√∂- schen sind (f√ºr eine √ú bersicht ...
   2. Ob und wann die Aufbewahrung von Daten nicht mehr erforderlich ist, liegt grunds√§tzlich im  Ermessen des Dateninhabers, also des Handwerksbetriebs, de...
   3. ben√∂tigt, schreiben zahlreichliche gesetzliche Regelungen vor, dass bestimmte Daten min- destens f√ºr einen konkreten Zeitraum aufzubewahren sind. Solc...
   4. Aufbewahrungspflichten ‚Äì d.h. nach zehn Jahren ‚Äì nicht mehr zur Abwicklung des Ver- trags ben√∂tigt. Die Daten des Kunden k√∂nne

In [45]:
print("üß™ TEST 1b ('gpt-5-nano'): Data retention periods: english Q > EN temp prompt > A english?")
result2_nano = ask_gdpr_question_en_5_nano("How long can i keep a client's email stored?")

üß™ TEST 1b ('gpt-5-nano'): Data retention periods: english Q > EN temp prompt > A english?
‚ùì Frage: How long can i keep a client's email stored?
‚è≥ Denke nach...
‚úÖ Answer: 

üìö Source (4):
   1. Gesetzliche L√∂schfristen     In vereinzelten F√§llen schreiben gesetzliche Regelungen vor, wann bestimmte Daten zu l√∂- schen sind (f√ºr eine √ú bersicht ...
   2. Ob und wann die Aufbewahrung von Daten nicht mehr erforderlich ist, liegt grunds√§tzlich im  Ermessen des Dateninhabers, also des Handwerksbetriebs, de...
   3. ben√∂tigt, schreiben zahlreichliche gesetzliche Regelungen vor, dass bestimmte Daten min- destens f√ºr einen konkreten Zeitraum aufzubewahren sind. Solc...
   4. Aufbewahrungspflichten ‚Äì d.h. nach zehn Jahren ‚Äì nicht mehr zur Abwicklung des Ver- trags ben√∂tigt. Die Daten des Kunden k√∂nnen jedoch f√ºr die weitere...
‚Äï‚Äï‚Äï‚Äï‚Äï‚Äï‚Äï‚Äï‚Äï‚Äï‚Äï‚Äï‚Äï‚Äï‚Äï‚Äï‚Äï‚Äï‚Äï‚Äï‚Äï‚Äï‚Äï‚Äï‚Äï‚Äï‚Äï‚Äï‚Äï‚Äï‚Äï‚Äï‚Äï‚Äï‚Äï‚Äï‚Äï‚Äï‚Äï‚Äï‚Äï‚Äï‚Äï‚Äï‚Äï‚Äï‚Äï‚Äï‚Äï

In [32]:
# Test 4: Employee data (Question in German through the english Prompt template pipeline)
print("üß™ TEST 4: Mitarbeiterdaten")
result4 = ask_gdpr_question_en("Welche Regeln gelten f√ºr die Verarbeitung von Mitarbeiterdaten?")


üß™ TEST 4: Mitarbeiterdaten
‚ùì Frage: Welche Regeln gelten f√ºr die Verarbeitung von Mitarbeiterdaten?
‚è≥ Denke nach...
‚úÖ Answer: F√ºr die Verarbeitung von Mitarbeiterdaten gelten die Rechte der DSGVO und des BDSG. Arbeitgeber d√ºrfen nur Daten erheben, die f√ºr das Besch√§ftigungsverh√§ltnis relevant sind und m√ºssen die Mitarbeiter dar√ºber informieren.

üìö Source (4):
   1. 9. Rechtm√§√üige Datenverarbeitung von Besch√§ftigten      Gilt der Datenschutz auch in Besch√§ftigungsverh√§ltnissen?    Ja. F√ºr Besch√§ftigte gelten ‚Äì wie...
   2. Im Bundesdatenschutzgesetz werden lediglich zur Klarstellung bestimmte allgemeine Daten- schutzrechte nochmals gesondert f√ºr Besch√§ftigte geregelt (¬ß ...
   3. bei.    Einwilligungen sind nur wirksam, wenn sie freiwillig erkl√§rt werden (siehe hierzu Kapitel 2 ‚ÄûAn- forderungen der datenschutzrechtlichen Einwil...
   4. gesetzlich vorgeschriebenen Informations- und Dokumentationspflichten zu erf√ºllen.     Arbeitgeber sind in diesem Zu

In [33]:
# Test 4: Employee data (Question in German through the GERMAN Prompt template pipeline)
print("üß™ TEST 4: Mitarbeiterdaten")
result4 = ask_gdpr_question_de("Welche Regeln gelten f√ºr die Verarbeitung von Mitarbeiterdaten?")


üß™ TEST 4: Mitarbeiterdaten
‚ùì Frage: Welche Regeln gelten f√ºr die Verarbeitung von Mitarbeiterdaten?
‚è≥ Denke nach...
‚úÖ Antwort: F√ºr die Verarbeitung von Mitarbeiterdaten gelten die Rechte der DSGVO und des BDSG. Arbeitgeber d√ºrfen personenbezogene Daten ihrer Mitarbeiter erheben, speichern und nutzen, wenn dies f√ºr das Besch√§ftigungsverh√§ltnis erforderlich ist. Es m√ºssen Informations- und Dokumentationspflichten erf√ºllt werden, z.B. die Mitarbeiter zu Beginn des Besch√§ftigungsverh√§ltnisses √ºber die Datenverarbeitung informieren und die Prozesse dokumentieren. Einwilligungen m√ºssen freiwillig erfolgen, ohne Zwang oder Druck.

üìö Verwendete Quellen (4):
   1. 9. Rechtm√§√üige Datenverarbeitung von Besch√§ftigten      Gilt der Datenschutz auch in Besch√§ftigungsverh√§ltnissen?    Ja. F√ºr Besch√§ftigte gelten ‚Äì wie...
   2. Im Bundesdatenschutzgesetz werden lediglich zur Klarstellung bestimmte allgemeine Daten- schutzrechte nochmals gesondert f√ºr Besch√§ftigte ger

In [38]:
# Test 4: Employee data (Question in German through the GERMAN Prompt template pipeline)
print("üß™ TEST 4: Mitarbeiterdaten")
result4 = ask_gdpr_question_de("What do i have to watch out when sending promotional marketing emails to new clients?")


üß™ TEST 4: Mitarbeiterdaten
‚ùì Frage: What do i have to watch out when sending promotional marketing emails to new clients?
‚è≥ Denke nach...
‚úÖ Antwort: Bevor Sie Werbe-E-Mails an neue Kunden senden, m√ºssen Sie sicherstellen, dass Sie die ausdr√ºckliche Einwilligung der Kunden haben. Dies bedeutet, dass die Kunden aktiv zugestimmt haben, ihre Kontaktdaten f√ºr Werbezwecke zu verwenden. Die Einwilligung muss freiwillig und jederzeit widerruflich sein. Au√üerdem m√ºssen Sie sicherstellen, dass die Kunden die M√∂glichkeit haben, der Werbung jederzeit zu widersprechen. Beachten Sie auch, dass f√ºr Werbung per E-Mail eine separate Einwilligung erforderlich ist.

üìö Verwendete Quellen (4):
   1. 7 DSGVO), ist die Einwilligung der Kunden zwingend erforderlich. Eine Mustereinwilligung  liegt als Anlage 19 bei.       Wettbewerbsrecht beachten    ...
   2. Anlage 1    Anforderungen der datenschutzrechtlichen Einwilligung        Muster     Einwilligungserkl√§rung          In unserem Werbe

## AGENT: Initializing the Conversational Agent

In [60]:
from langchain.chains.conversation.memory import ConversationBufferWindowMemory

# conversational memory
conversational_memory = ConversationBufferWindowMemory(
    memory_key='chat_history',
    k=2,
    return_messages=True
)

In [61]:
from langchain.agents import Tool

In [62]:
# Create a tool from your existing QA chain
print("üõ†Ô∏è Creating tool from QA chain...")

gdpr_tool = Tool(
    name='GDPR_Knowledge_Base',
    func=lambda query: qa_chain_en.invoke({"query": query})["result"],
    description=(
        'Use this tool when answering GDPR, data protection, or privacy questions for craftspeople and small businesses. '
        'It contains official guidelines from the ZDH Data Protection Guide for German Crafts Businesses, small Businesses and skilled trades. ZDH = Zentralverband des Deutschen Handwerks / Central Association of German Skilled Trades'
    )
)

tools = [gdpr_tool]

print("‚úÖ GDPR tool created!")
print(f"Tool name: {gdpr_tool.name}")
print(f"Tool description: {gdpr_tool.description}")

üõ†Ô∏è Creating tool from QA chain...
‚úÖ GDPR tool created!
Tool name: GDPR_Knowledge_Base
Tool description: Use this tool when answering GDPR, data protection, or privacy questions for craftspeople and small businesses. It contains official guidelines from the ZDH Data Protection Guide for German Crafts Businesses, small Businesses and skilled trades. ZDH = Zentralverband des Deutschen Handwerks / Central Association of German Skilled Trades


In [63]:
from langchain.agents import initialize_agent

In [64]:
agent = initialize_agent(
    agent='chat-conversational-react-description',
    tools=tools,
    llm=llm_3_turbo,
    verbose=True,
    max_iterations=3,
    early_stopping_method='generate',
    memory=conversational_memory
)

print("‚úÖ Conversational agent initialized!")
print(f"Agent type: chat-conversational-react-description")
print(f"Tools: {[tool.name for tool in tools]}")
print(f"Memory: Enabled")

‚úÖ Conversational agent initialized!
Agent type: chat-conversational-react-description
Tools: ['GDPR_Knowledge_Base']
Memory: Enabled


In [66]:
# Cell: Function for the Agent (German + English)
def ask_gdpr_agent(question):
    """
    Simple agent function without try/except
    """
    print(f"ü§î Question: {question}")
    result = agent.invoke({"input": question})
    print(f"‚úÖ Answer: {result['output']}\n")
    return result


In [56]:
# Agent Test 1: Employee survailance
print("üß™ Agent TEST 1 ('gpt-3.5-turbo'): Employee survailance: english Q > EN Agent prompt > A english?")
ask_gdpr_agent("Can I install surveillance cameras to monitor my employees?")


üß™ Agent TEST 1 ('gpt-3.5-turbo'): Employee survailance: english Q > EN Agent prompt > A english?
ü§î Question: Can I install surveillance cameras to monitor my employees?


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{
    "action": "Final Answer",
    "action_input": "Installing surveillance cameras to monitor employees may raise privacy and legal concerns. It is important to consider relevant laws and regulations, such as GDPR, to ensure compliance and protect the rights of employees."
}
```[0m

[1m> Finished chain.[0m
‚úÖ Answer: Installing surveillance cameras to monitor employees may raise privacy and legal concerns. It is important to consider relevant laws and regulations, such as GDPR, to ensure compliance and protect the rights of employees.



{'input': 'Can I install surveillance cameras to monitor my employees?',
 'chat_history': [],
 'output': 'Installing surveillance cameras to monitor employees may raise privacy and legal concerns. It is important to consider relevant laws and regulations, such as GDPR, to ensure compliance and protect the rights of employees.'}

In [67]:
# Single Question with Full Details
print("üîç Single Question Analysis")
print("=" * 70)

# Ask one question with full agent thinking visible
question = "Darf ich in meinem Lager oder Depot Video√ºberwachung einsetzen?"
print(f"ü§î QUESTION: {question}")
print("=" * 70)

# Get the result
result = agent.invoke({"input": question})

print("\n‚úÖ FINAL ANSWER:")
print("-" * 40)
print(result['output'])

print("\nüìã SOURCE DOCUMENTS:")
print("-" * 40)
if result.get('intermediate_steps'):
    # Extract source documents from agent steps
    for i, step in enumerate(result['intermediate_steps']):
        if len(step) > 1 and hasattr(step[1], 'get') and step[1].get('source_documents'):
            sources = step[1]['source_documents']
            print(f"üìö Found {len(sources)} source documents:")
            for j, doc in enumerate(sources):
                print(f"\n   üìÑ Source {j+1}:")
                print(f"      üìñ Content: {doc.page_content[:200]}...")
                print(f"      üè∑Ô∏è  Metadata: {dict(list(doc.metadata.items())[:3])}...")  # Show first 3 metadata fields
            break
else:
    print("No source documents found in intermediate steps")

üîç Single Question Analysis
ü§î QUESTION: Darf ich in meinem Lager oder Depot Video√ºberwachung einsetzen?


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m```json
{
    "action": "GDPR_Knowledge_Base",
    "action_input": "Video√ºberwachung im Betrieb"
}
```[0m
Observation: [36;1m[1;3mIn √∂ffentlich zug√§nglichen Bereichen des Betriebs d√ºrfen Kameras ohne Einwilligung eingesetzt werden, solange ein berechtigter Zweck verfolgt wird und die Besch√§ftigten zustimmen. Dauerhafte √úberwachung ist jedoch nicht erlaubt. In nicht √∂ffentlich zug√§nglichen Bereichen ist das Filmen von Besch√§ftigten nur mit ausdr√ºcklicher Einwilligung erlaubt. Kunden haben in der Regel kein h√∂heres Schutzinteresse, Passanten hingegen schon. Kameras d√ºrfen nur das Betriebsgel√§nde erfassen.[0m
Thought:[32;1m[1;3m```json
{
    "action": "Final Answer",
    "action_input": "In √∂ffentlich zug√§nglichen Bereichen des Betriebs d√ºrfen Kameras ohne Einwilligung eingesetzt werden, solange ein

In [None]:

# "Darf ich in meinem Lager oder Depot Video√ºberwachung einsetzen?" 
# "Was muss ich dabei beachten?‚Äú

# german_questions = [
#     "Wie lange d√ºrfen Kundendaten gespeichert werden?",
#     "Darf ich √úberwachungskameras zur Mitarbeiter√ºberwachung einsetzen?"
# ]

# english_questions = [
#     "Can I install surveillance cameras to monitor my employees?",
#     "Am I allowed to use CCTV in my warehouse or depot?",
#     "What are the rules for video surveillance in employee parking areas?",
#     "Do I need to inform employees about surveillance cameras?"
# ]

In [None]:
# print("üß™ Testing the agent with bilingual questions...")

# # Only 2 test questions - one German, one English
# test_questions = [
#     "What are the basic principles of data processing?",   # English
#     "Wie lange d√ºrfen Kundendaten gespeichert werden?"  # German
# ]

# print("=" * 50)
# for question in test_questions:
#     ask_gdpr_agent(question)
# print("=" * 50)

In [None]:

# Only 2 test questions - one German, one English
test_questions = [
    "What are the basic principles of data processing?",   # English
    "Wie lange d√ºrfen Kundendaten gespeichert werden?"  # German
]

print("=" * 50)
for question in test_questions:
    ask_gdpr_agent(question)
print("=" * 50)

In [None]:
# New Cell: Test the Agent (Simplified)
print("üß™ Testing the agent...")

def ask_gdpr_agent(question):
    """
    Ask questions using the agent approach - simplified version
    """
    print(f"ü§î Frage: {question}")
    print("‚è≥ Agent denkt nach...")
    
    # Direct call without error handling
    result = agent.invoke({"input": question})
    
    print(f"‚úÖ Antwort: {result['output']}")
    
    return result

# Test questions
test_questions = [
    "Wie lange d√ºrfen Kundendaten gespeichert werden?",
    "Was sind die Grunds√§tze der Datenverarbeitung?",
    "Muss ich einen Datenschutzbeauftragten haben?"
]

print("=" * 60)
for i, question in enumerate(test_questions, 1):
    print(f"\nüìã Test {i}: {question}")
    print("-" * 40)
    result = ask_gdpr_agent(question)
    print("=" * 60)

===
----

----

In [None]:
# FROM LESSON NOTEBOOK

# from langchain.agents import Tool

# tools = [
#     Tool(
#         name='Knowledge Base',
#         func=qa.run,
#         description=(
#             'use this tool when answering general knowledge queries to get '
#             'more information about the topic'
#         )
#     )
# ]

In [None]:
# zzz
# # Test 3: Data breach procedures
# print("üß™ TEST 3: Datenpannen")
# result3 = ask_gdpr_question("Was muss ich tun bei einer Datenschutzverletzung?")


-----
----


# Draft