# RAG QnA Private Document using langchain, Pinecone and OpenAI

In [22]:
import os
from pinecone import Pinecone
os.environ['OPENAI_API_KEY'] = 'your-openai-apikey'  
pc = Pinecone(api_key ='Your-pinecone-apikey')

In [23]:
import os
os.environ['PINECONE_API_KEY'] = 'Your-pinecone-apikey'


## Loading the document

In [24]:
def load_document(file):
    from langchain.document_loaders import PyPDFLoader
    print(f'loading {file})')
    loader = PyPDFLoader(file)
    data = loader.load()
    return data

In [25]:
data= load_document(r'C:\Users\Kaustubh\Downloads\gdpr.pdf')

loading C:\Users\Kaustubh\Downloads\gdpr.pdf)


## Chunking

In [26]:
def chunk_data(data, chunk_size=256):
    from langchain.text_splitter import RecursiveCharacterTextSplitter
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=0)
    chunks = text_splitter.split_documents(data)
    return chunks

chunks =chunk_data(data)

In [31]:
print(len(chunks))

1724


## Embedding and uploading to vector Database (Pinecone)

In [32]:
def insert_or_fetch_embedding(index_name, chunks):
    import pinecone
    from langchain_community.vectorstores import Pinecone
    from langchain_openai import OpenAIEmbeddings
    from pinecone import PodSpec
    
    pc= pinecone.Pinecone(api_key ='Your-pinecone-apikey')
    embeddings = OpenAIEmbeddings(model ='text-embedding-3-small', dimensions = 1536)

    if index_name in pc.list_indexes().names():
        print(f'Index already exists',end='')
        vector_store = Pinecone.from_existing_index(index_name, embeddings)
        print('OK')
    else:
        print('creating index',end='')
        pc.create_index(
            name= index_name,
            dimension= 1536,
            metric = 'cosine',
            spec= PodSpec(environment ='gcp-starter')
        )
        vector_store= Pinecone.from_documents(chunks, embeddings, index_name= index_name)
        print('OK')
        return vector_store

In [34]:
index_name = 'gdpr-compliant-index'
vector_store = insert_or_fetch_embedding(index_name, chunks)

creating indexOK


## Asking and getting answer

In [35]:
def ask_and_get_answer(vector_store, q):
    from langchain.chains import RetrievalQA
    from langchain_openai import ChatOpenAI
    
    llm = ChatOpenAI(model='gpt-4o-mini',temperature=1)

    retriever = vector_store.as_retriever(search_type ='similarity',search_kwargs={'k':3})

    chain = RetrievalQA.from_chain_type(llm=llm, chain_type = "stuff", retriever=retriever)
    answer = chain.run(q)
    return answer

In [37]:
i =1
print('type quite or exit to quite')
while True:
    q=input(f'Question #{i}:')
    i=i+1
    if q.lower() in ['quite','exit']:
        print('Goodbye')
        break
    answer = ask_and_get_answer(vector_store, q)
    print(f'Answer ={answer} ')
    print(f'\n {"-"*50}')

type quite or exit to quite


Question #1: What is the document all about?


Answer =The document appears to discuss the processing and public access to official documents, particularly focusing on personal data held by public authorities or bodies. It emphasizes the importance of promoting knowledge and documentation on data protection legislation and practices, as well as the public interest in accessing official documents. It also highlights the disclosure of personal data in documents held by public entities. 

 --------------------------------------------------


Question #2:  What documentation do we need to prove that we're GDPR compliant?


Answer =To prove GDPR compliance, you may need to maintain several types of documentation, including:

1. Records of processing activities: Document the personal data you process, the purpose of processing, how long you keep the data, and who it is shared with.
2. Data protection policies and procedures: Having clear written policies on data protection, including breach response procedures.
3. Data protection impact assessments (DPIAs): If your processing poses a high risk to individualsâ€™ rights, conducting DPIAs is necessary to identify and mitigate those risks.
4. Consent records: Documentation of how consent is obtained from individuals for processing their personal data, including what information was provided to them.
5. Contracts with processors: Maintain records of contracts with any third-party processors to ensure they comply with GDPR requirements.
6. Training records: Documentation that employees are trained in data protection and GDPR compliance.
7. Certification: If appl

Question #3: What are the penalties for non-compliance with GDPR?


Answer =I don't know. 

 --------------------------------------------------


Question #4: How would you conduct a data protection impact assessment (DPIA)?


Answer =I don't know. 

 --------------------------------------------------


Question #5: What are GDPR fundamental rights?


Answer =The General Data Protection Regulation (GDPR) establishes several fundamental rights for individuals regarding the processing of their personal data. These rights include:

1. **Right to Access**: Individuals have the right to know whether their personal data is being processed and to access that data.
  
2. **Right to Rectification**: Individuals can request the correction of inaccurate or incomplete personal data.

3. **Right to Erasure** (Right to be Forgotten): Individuals have the right to request the deletion of their personal data under certain circumstances.

4. **Right to Restriction of Processing**: Individuals can request that the processing of their personal data be restricted.

5. **Right to Data Portability**: Individuals have the right to obtain and reuse their personal data for their own purposes across different services.

6. **Right to Object**: Individuals can object to the processing of their personal data in certain situations.

7. **Rights in relation to a

Question #6: What are the penalties for GDPR breaches?


Answer =I don't know. 

 --------------------------------------------------


Question #7: exit


Goodbye
