In [1]:
import os

import regex as re
import pandas as pd
from llama_index.llms.openai import OpenAI
from llama_index.core import Settings
from llama_index.core import SummaryIndex, VectorStoreIndex
from llama_index.readers.web import SimpleWebPageReader
from llama_index.core.tools import BaseTool, FunctionTool
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.query_engine import SubQuestionQueryEngine
from llama_index.core.agent import ReActAgent
from IPython.display import Markdown, display
from duckduckgo_search import DDGS

In [2]:
import nest_asyncio

nest_asyncio.apply()

In [3]:
llm = OpenAI(model="gpt-4o")
Settings.llm = llm

In [6]:
policies = pd.read_csv("../../privacy_policies.csv")
policies.drop_duplicates(subset=["link"], inplace=True, ignore_index=True)

In [15]:
policies

Unnamed: 0,name,model,link
0,Google Analytics,google,https://policies.google.com/privacy
1,Google Analytics for Firebase,firebase,https://firebase.google.com/policies/analytics
2,Twitter,twitter,https://twitter.com/privacy
3,Facebook,facebook,https://www.facebook.com/about/privacy/
4,WhatsApp,whatsapp,https://www.whatsapp.com/legal/privacy-policy/
5,Instagram,insta,https://help.instagram.com/519522125107875
6,Messenger,msgr,https://www.messenger.com/privacy
7,Disqus,disqus,https://help.disqus.com/en/articles/1717103-di...
8,Microsoft Clarity,clarity,https://privacy.microsoft.com/en-gb/privacysta...
9,Matomo,matomo,https://matomo.org/privacy-policy/


In [7]:
companies_documents = SimpleWebPageReader(html_to_text=True).load_data(
    policies['link'].to_list())

In [8]:
policies_query_tool = [
    QueryEngineTool(
        query_engine=VectorStoreIndex.from_documents([companies_documents[i]
                                                      ]).as_query_engine(),
        metadata=ToolMetadata(
            name=f"privacy_policy_for_{policies.loc[i, 'name']}",
            description=("useful for when you want to know "
                         f"{policies.loc[i, 'name']}'s privacy policy"),
        ),
    ) for i in range(len(companies_documents))
]

In [9]:
policies_query_engine = SubQuestionQueryEngine.from_defaults(
    query_engine_tools=policies_query_tool)

In [10]:
privacy_query_engine_tool = QueryEngineTool(
    query_engine=policies_query_engine,
    metadata=ToolMetadata(
        name="sub_question_query_engine_for_privacy_policies",
        description=(
            "useful for when you want to answer queries that require analyzing"
            " multiple privacy policies from different companies"),
    ),
)

In [11]:
euro_dgpr_query_tool = QueryEngineTool(
    query_engine=VectorStoreIndex.from_documents(
        SimpleWebPageReader(html_to_text=True).load_data([
            'https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=CELEX:32016L0680'
        ])).as_query_engine(),
    metadata=ToolMetadata(
        name="euro_dgpr_query_tool",
        description=
        "useful for when you want to know about the European General Data Protection Regulation",
    ),
)

In [12]:
instance = DDGS()
methods = [
    FunctionTool.from_defaults(fn=getattr(instance, method),
                               description=re.match(
                                   r"^(.*?)(?:\bRaises:\n|$)",
                                   getattr(instance, method).__doc__,
                                   re.DOTALL).group(1).strip())
    for method in ['text']  # dir(instance)
    if callable(getattr(instance, method)) and not method.startswith("_")
]
methods = list(filter(lambda x: len(x.metadata.description) <= 1024, methods))

In [13]:
agent = ReActAgent.from_tools(
    [privacy_query_engine_tool, euro_dgpr_query_tool],
    llm=llm,
    verbose=True,
    # context=context
)

In [18]:
agent.reset()
output = agent.chat(
    "Help me generate a full privacy policy of my project, which is a PDF edit tool. The privacy policy should obey European General Data Protection Regulation. You can use Google's privacy policy as a reference."
)

display(Markdown(f"<b>{output.response}</b>"))

[1;3;38;5;200mThought: The current language of the user is English. I need to use a tool to help me answer the question.
Action: euro_dgpr_query_tool
Action Input: {'input': 'What are the key requirements for a privacy policy under the European General Data Protection Regulation?'}
[0m[1;3;34mObservation: The key requirements for a privacy policy under the European General Data Protection Regulation (GDPR) include:

1. **Lawfulness of Processing**: Processing must be lawful and necessary for the performance of a task carried out by a competent authority, based on Union or Member State law.

2. **Specific Processing Conditions**: Personal data collected for specific purposes must not be processed for other purposes unless authorized by law.

3. **Data Protection by Design and by Default**: Controllers must implement appropriate technical and organizational measures to ensure data protection principles are integrated into processing activities.

4. **Joint Controllers**: When two or m

<b>---

**Privacy Policy for PDF Edit Tool**

**1. Introduction**

Welcome to [Your Company Name]'s PDF Edit Tool. We are committed to protecting your personal data and respecting your privacy. This privacy policy explains how we collect, use, and protect your personal data in compliance with the European General Data Protection Regulation (GDPR).

**2. Lawfulness of Processing**

We process your personal data lawfully, fairly, and transparently. The processing is necessary for the performance of our services and is based on your consent or other legal grounds as required by GDPR.

**3. Data Collection and Use**

We collect personal data that you provide to us directly, such as your name, email address, and any documents you upload for editing. We use this data to provide, maintain, and improve our services, communicate with you, and ensure the security of our platform.

**4. Specific Processing Conditions**

Personal data collected for specific purposes will not be processed for other purposes unless authorized by law or with your explicit consent.

**5. Data Protection by Design and by Default**

We implement appropriate technical and organizational measures to ensure that data protection principles are integrated into our processing activities. This includes data minimization, pseudonymization, and encryption where applicable.

**6. Joint Controllers**

If we jointly determine the purposes and means of processing with other entities, we will transparently determine our respective responsibilities and designate a contact point for data subjects.

**7. Processor Requirements**

We use processors that provide sufficient guarantees to implement appropriate measures. Processing by our processors is governed by binding contracts outlining the nature, purpose, and obligations related to the processing.

**8. Distinction Between Data Subjects**

We distinguish between different categories of data subjects, such as users, customers, and other parties related to our services, and ensure that their data is processed accordingly.

**9. Quality of Personal Data**

We ensure that personal data is accurate, complete, and up-to-date. We take steps to rectify or erase incorrect data without undue delay.

**10. Time-Limits for Storage and Review**

We establish appropriate time limits for the erasure or periodic review of personal data storage needs. Personal data is retained only for as long as necessary to fulfill the purposes for which it was collected.

**11. Your Rights**

You have the right to access, rectify, erase, restrict, or object to the processing of your personal data. You also have the right to data portability and to withdraw your consent at any time.

**12. Contact Us**

If you have any questions or concerns about this privacy policy or our data processing practices, please contact us at [Your Contact Information].

**13. Changes to This Privacy Policy**

We may update this privacy policy from time to time. Any changes will be posted on this page, and we will notify you of significant changes.

---

This privacy policy ensures compliance with the GDPR and provides transparency about how personal data is handled in your PDF edit tool project.</b>

In [16]:
# define prompt viewing function
def display_prompt_dict(prompts_dict):
    for k, p in prompts_dict.items():
        text_md = f"**Prompt Key**: {k}" f"**Text:** "
        display(Markdown(text_md))
        print(p.get_template())
        display(Markdown(""))

In [17]:
display_prompt_dict(agent.get_prompts())

**Prompt Key**: agent_worker:system_prompt**Text:** 

You are designed to help with a variety of tasks, from answering questions to providing summaries to other types of analyses.

## Tools

You have access to a wide variety of tools. You are responsible for using the tools in any sequence you deem appropriate to complete the task at hand.
This may require breaking the task into subtasks and using different tools to complete each subtask.

You have access to the following tools:
{tool_desc}


## Output Format

Please answer in the same language as the question and use the following format:

```
Thought: The current language of the user is: (user's language). I need to use a tool to help me answer the question.
Action: tool name (one of {tool_names}) if using a tool.
Action Input: the input to the tool, in a JSON format representing the kwargs (e.g. {{"input": "hello world", "num_beams": 5}})
```

Please ALWAYS start with a Thought.

Please use a valid JSON format for the Action Input. Do NOT do this {{'input': 'hello world', 'num_beams': 

