In [2]:
# !pip install langchain-ollama langchain-text-splitters langchain-core

In [3]:
from langchain_ollama import ChatOllama

llm = ChatOllama(
    model="llama3",
    temperature=0,  # Adjust temperature for desired response variability
)


In [4]:
from langchain_community.document_loaders import WebBaseLoader
from bs4 import BeautifulSoup

# Load the GDPR text from a URL
loader = WebBaseLoader(web_paths=("https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX:32016R0679",))
docs = loader.load()

print(f"Loaded {len(docs)} document(s).")


USER_AGENT environment variable not set, consider setting it to identify your requests.


Loaded 1 document(s).


In [5]:
docs



In [6]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,  # Maximum size of each chunk
    chunk_overlap=200,  # Overlap between chunks for context
)
splits = text_splitter.split_documents(docs)

print(f"Split GDPR into {len(splits)} chunks.")


Split GDPR into 518 chunks.


In [7]:
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_ollama import OllamaEmbeddings

embeddings = OllamaEmbeddings(model="llama3")
vector_store = InMemoryVectorStore(embeddings)
vector_store.add_documents(splits)


['b5f5f89a-d632-4dc6-a3e3-112dc64c6dd7',
 'a3029609-a391-410e-8957-489b57b19f60',
 '0cd72c7e-f960-4e7b-9976-8555370c604c',
 'c88b010d-1fe1-4308-a7d6-18a52cff51b0',
 'fd3bd68c-af4b-4d55-a239-00de38583dca',
 '322172f0-a0e1-4f24-90d4-0860ad6863d4',
 'abee57da-93bb-4eeb-a89a-8210dddba480',
 'c5e5aff0-9a24-442e-8e33-1173505ea2d7',
 'd9864c76-6245-4c45-96f6-eff68146c00f',
 'e645db6d-0c57-4b0a-914e-2a83ba7264fd',
 '33bb092a-4395-4ad9-a63a-4aa657c1238c',
 '6fcbe640-e501-4e7f-8366-4c5a7e12d8ff',
 'dca9b8b9-bc0f-4ba3-a9b4-ef34f7699147',
 '24ba3b24-daae-418b-b926-2317f79abf7a',
 '9d3a3640-3c71-49f9-9fba-e16135efb3aa',
 '974bc87f-b53c-4379-a200-c2c112cb9b60',
 '0fdb5793-2475-4264-8618-bbc528df7bc4',
 '670c1287-2fba-4a29-b1d4-a9c150a01ded',
 'bab6d391-9d38-4c03-8d16-cbf1c3b48ae6',
 'e1e7c8b2-f602-4500-b225-d88ae43cb0b5',
 '402d7b32-e927-44a5-9a0f-8975606abe89',
 'bfd9e02e-9194-4644-ab8e-d07800c60d3e',
 '1e38faa3-5c65-4f3d-bbcb-3a19cc4bd023',
 '8244a590-325b-4ba4-9349-188687e5dd3d',
 '96a6719c-c1af-

In [19]:
results = vector_store.similarity_search(
    "What are the rights of data subjects under GDPR?"
)

print(results[0])

page_content='3.   The controller shall provide information on action taken on a request under Articles 15 to 22 to the data subject without undue delay and in any event within one month of receipt of the request. That period may be extended by two further months where necessary, taking into account the complexity and number of the requests. The controller shall inform the data subject of any such extension within one month of receipt of the request, together with the reasons for the delay. Where the data subject makes the request by electronic form means, the information shall be provided by electronic means where possible, unless otherwise requested by the data subject.' metadata={'source': 'https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX:32016R0679', 'title': 'Regulation - 2016/679 - EN - gdpr - EUR-Lex', 'language': 'en'}


In [8]:
from langchain_core.documents import Document
from typing_extensions import TypedDict, List

class State(TypedDict):
    query: str
    context: List[Document]
    answer: str


In [9]:
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["query"])
    return {"context": retrieved_docs}


In [10]:
from langchain import hub

# Load an example Q&A prompt
prompt = hub.pull("rlm/rag-prompt")

def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    response = llm.invoke([
        ("system", "Answer the following question using the provided context."),
        ("user", f"Question: {state['query']}\nContext: {docs_content}"),
    ])
    return {"answer": response.content}




In [12]:
!pip install langgraph

Collecting langgraph
  Downloading langgraph-0.2.59-py3-none-any.whl.metadata (15 kB)
Collecting langgraph-checkpoint<3.0.0,>=2.0.4 (from langgraph)
  Downloading langgraph_checkpoint-2.0.9-py3-none-any.whl.metadata (4.6 kB)
Collecting langgraph-sdk<0.2.0,>=0.1.42 (from langgraph)
  Downloading langgraph_sdk-0.1.47-py3-none-any.whl.metadata (1.8 kB)
Collecting msgpack<2.0.0,>=1.1.0 (from langgraph-checkpoint<3.0.0,>=2.0.4->langgraph)
  Downloading msgpack-1.1.0-cp39-cp39-macosx_10_9_x86_64.whl.metadata (8.4 kB)
Downloading langgraph-0.2.59-py3-none-any.whl (135 kB)
Downloading langgraph_checkpoint-2.0.9-py3-none-any.whl (37 kB)
Downloading langgraph_sdk-0.1.47-py3-none-any.whl (43 kB)
Downloading msgpack-1.1.0-cp39-cp39-macosx_10_9_x86_64.whl (84 kB)
Installing collected packages: msgpack, langgraph-sdk, langgraph-checkpoint, langgraph
Successfully installed langgraph-0.2.59 langgraph-checkpoint-2.0.9 langgraph-sdk-0.1.47 msgpack-1.1.0


In [13]:
from langgraph.graph import StateGraph, START

graph_builder = StateGraph(State)
graph_builder.add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()


In [14]:
response = graph.invoke({"query": "What are the rights of data subjects under GDPR?"})
print(response["answer"])

According to the General Data Protection Regulation (GDPR), the rights of data subjects under GDPR include:

1. The right to access their personal data (Article 15)
2. The right to rectification of inaccurate or incomplete personal data (Article 16)
3. The right to erasure (right to be forgotten) of personal data (Article 17)
4. The right to restriction of processing of personal data (Article 18)
5. The right to object to processing of personal data (Article 21)
6. The right to data portability, allowing them to have their personal data transmitted directly from one controller to another (Article 20)

These rights are designed to give individuals greater control over their personal data and to ensure that it is processed in a transparent and secure manner.
