In [1]:
import os

import regex as re
import pandas as pd
from llama_index.llms.openai import OpenAI
from llama_index.core import Settings
from llama_index.core import SummaryIndex, VectorStoreIndex
from llama_index.readers.web import SimpleWebPageReader
from llama_index.core.tools import BaseTool, FunctionTool
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.query_engine import SubQuestionQueryEngine
from llama_index.core.agent import ReActAgent
from IPython.display import Markdown, display
from duckduckgo_search import DDGS

In [2]:
import nest_asyncio

nest_asyncio.apply()

In [3]:
llm = OpenAI(model="gpt-4o")
Settings.llm = llm

In [4]:
policies = pd.read_csv("./privacy_policies.csv")
policies.drop_duplicates(subset=["link"], inplace=True, ignore_index=True)

In [10]:
policies.columns

Index(['name', 'model', 'link'], dtype='object')

In [5]:
documents = SimpleWebPageReader(html_to_text=True).load_data(
    policies['link'].to_list())

In [6]:
policies_query_tool = [
    QueryEngineTool(
        query_engine=VectorStoreIndex.from_documents([documents[i]
                                                      ]).as_query_engine(),
        metadata=ToolMetadata(
            name=f"privacy_policy_for_{policies.loc[i, 'name']}",
            description=("useful for when you want to know "
                         f"{policies.loc[i, 'name']}'s privacy policy"),
        ),
    ) for i in range(len(documents))
]

In [7]:
policies_query_engine = SubQuestionQueryEngine.from_defaults(
    query_engine_tools=policies_query_tool)

In [8]:
privacy_query_engine_tool = QueryEngineTool(
    query_engine=policies_query_engine,
    metadata=ToolMetadata(
        name="sub_question_query_engine_for_privacy_policies",
        description=(
            "useful for when you want to answer queries that require analyzing"
            " multiple privacy policies from different companies"),
    ),
)

In [9]:
instance = DDGS()
methods = [
    FunctionTool.from_defaults(fn=getattr(instance, method),
                               description=re.match(
                                   r"^(.*?)(?:\bRaises:\n|$)",
                                   getattr(instance, method).__doc__,
                                   re.DOTALL).group(1).strip())
    for method in ['text']  # dir(instance)
    if callable(getattr(instance, method)) and not method.startswith("_")
]
methods = list(filter(lambda x: len(x.metadata.description) <= 1024, methods))

In [13]:
agent = ReActAgent.from_tools(
    [privacy_query_engine_tool],
    llm=llm,
    verbose=True,
    # context=context
)

In [15]:
output = agent.chat(
    "Help me generate a full privacy policy of my project, which is a PDF edit tool. Please following Google's privacy policy."
)

display(Markdown(f"<b>{output.response}</b>"))

[1;3;38;5;200mThought: The current language of the user is English. I need to use a tool to help me answer the question.
Action: sub_question_query_engine_for_privacy_policies
Action Input: {'input': "full privacy policy of a PDF edit tool following Google's privacy policy"}
[0mGenerated 2 sub questions.
[1;3;38;2;237;90;200m[privacy_policy_for_Google Analytics] Q: What is the privacy policy for Google Analytics?
[0m[1;3;38;2;90;149;237m[privacy_policy_for_Google Analytics for Firebase] Q: What is the privacy policy for Google Analytics for Firebase?
[0m[1;3;38;2;237;90;200m[privacy_policy_for_Google Analytics] A: The privacy policy for Google Analytics is not specifically detailed in the provided information. However, you can find additional information about how Google uses information from sites or apps that use their services, including Google Analytics, under the "Technologies" section. For more specific details, you may need to refer to Google's main privacy policy or visi

<b>---

**Privacy Policy for [Your PDF Edit Tool]**

**Effective Date:** [Insert Date]

**1. Introduction**

Welcome to [Your PDF Edit Tool]. We are committed to protecting your privacy and ensuring that your personal information is handled in a safe and responsible manner. This Privacy Policy outlines how we collect, use, and protect your information when you use our services.

**2. Information We Collect**

- **Personal Information:** When you register or use our services, we may collect personal information such as your name, email address, and payment information.
- **Usage Data:** We collect information about how you interact with our tool, including the features you use, the files you upload, and the time and duration of your sessions.
- **Device Information:** We may collect information about the device you use to access our services, including the device type, operating system, and browser type.

**3. How We Use Your Information**

- **To Provide and Improve Services:** We use your information to operate, maintain, and improve our services.
- **To Communicate with You:** We may use your contact information to send you updates, security alerts, and support messages.
- **For Analytics:** We use data analytics to understand how our services are used and to improve user experience.

**4. Information Sharing and Disclosure**

- **Third-Party Service Providers:** We may share your information with third-party service providers who perform services on our behalf, such as payment processing and data analysis.
- **Legal Requirements:** We may disclose your information if required to do so by law or in response to valid requests by public authorities.

**5. Data Security**

We implement appropriate technical and organizational measures to protect your personal information from unauthorized access, use, or disclosure.

**6. Your Rights**

- **Access and Update:** You have the right to access and update your personal information.
- **Delete:** You can request the deletion of your personal information.
- **Opt-Out:** You can opt-out of receiving promotional communications from us.

**7. Changes to This Privacy Policy**

We may update this Privacy Policy from time to time. We will notify you of any changes by posting the new Privacy Policy on our website.

**8. Contact Us**

If you have any questions about this Privacy Policy, please contact us at [Your Contact Information].

---

**Note:** This is a sample privacy policy and should be customized to fit the specific practices and legal requirements of your PDF edit tool. It is advisable to consult with a legal professional to ensure compliance with all applicable laws and regulations.</b>

In [None]:
# 任务分类
# prompt 优化
# 专业化
# 最大化利用
# 尽可能的finetuning