In [1]:
import os
import sys
from typing import List, Dict, Any, Optional

import tiktoken
import pandas as pd
from llama_index.llms.openai import OpenAI
from llama_index.core import Settings, StorageContext
from llama_index.core import SummaryIndex, VectorStoreIndex
from llama_index.readers.web import SimpleWebPageReader
from llama_index.core.tools import BaseTool, FunctionTool
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.query_engine import SubQuestionQueryEngine
from llama_index.core.agent import ReActAgent
from llama_index.core.response_synthesizers import TreeSummarize
from llama_index.core.query_pipeline import QueryPipeline, FnComponent
from llama_index.core import PromptTemplate
from llama_index.core import get_response_synthesizer
from llama_index.core.response_synthesizers import ResponseMode
from llama_index.core.output_parsers import PydanticOutputParser
from llama_index.core.callbacks import CallbackManager, TokenCountingHandler
from llama_index.core.indices import load_index_from_storage
from llama_index.llms.text_generation_inference import TextGenerationInference
from pydantic import BaseModel, Field, conlist, field_validator
from enum import Enum
from IPython.display import Markdown, display
from duckduckgo_search import DDGS
from dotenv import load_dotenv

sys.path.append('../')

from src.pipelines import (
    prepare_regulation_syllabus_pipeline,
    prepare_regulation_query_engine,
    prepare_generate_pipeline,
    prepare_section_judge_pipeline,
    prepare_region_selection_pipeline,
)
from src.generators import PrivacyPolicyGenerator
from src.prompts import (
    ASK_SYLLABUS_TEMPLATE,
    FORMAT_SYLLABUS_TEMPLATE,
    REGENERATE_PROMPT,
    GENERATE_PROMPT,
    JUDGE_SECTION_TEMPLATE,
    FORMAT_JUDGE_TEMPLATE,
    REGION_SELECTION_TEMPLATE,
)
from src.formats import (
    Syllabus,
    SectionNames,
    Judges,
    Judge,
    SectionContent,
    RegulationRegions,
    SelectedRegions,
)
from src.law_model import prepare_law_llm


  _torch_pytree._register_pytree_node(


In [2]:
import nest_asyncio

nest_asyncio.apply()

In [3]:
llm = OpenAI(model="gpt-4o")
Settings.llm = llm

In [4]:
tokenizer_fn = tiktoken.encoding_for_model("gpt-4o").encode
token_counter = TokenCountingHandler(tokenizer=tokenizer_fn, verbose=True)
Settings.callback_manager = CallbackManager([token_counter])

## PrivacyPolicyGenerator

In [15]:
regions = await prepare_region_selection_pipeline(verbose=True).arun(
    user_input="San Diego", )

[1;3;38;2;155;135;227m> Running module 0c4dfc69-1cd9-4fef-9be0-99bf14205101 with input: 
user_input: San Diego

[0m[1;3;38;2;155;135;227m> Running module 9f562e68-65de-4da2-98cb-83b22a5a41bd with input: 
messages: 
Assistant: I can help determine which region a user input belongs to.
The regions are: United States, California, European Union, United Kingdom, Canada, Australia, India, Singapore, Japan, South Kor...

[0mLLM Prompt Token Usage: 294
LLM Completion Token Usage: 17
[1;3;38;2;155;135;227m> Running module 4d0142db-46f9-46b2-ad9b-fc67277f681b with input: 
input: assistant: ```json
{
  "regions": ["California", "United States"]
}
```

[0m[1;3;38;2;155;135;227m> Running module 2cccb6e5-0ed4-4fc9-b119-c73c4d287308 with input: 
input: regions=[<RegulationRegions.USCA: 'California'>, <RegulationRegions.US: 'United States'>]

[0m

In [5]:
regions = prepare_region_selection_pipeline(verbose=True).run(
    user_input="San Diego", )
regions

[1;3;38;2;155;135;227m> Running module 430d31b4-1e4a-4c36-b25d-23c125fc4249 with input: 
user_input: San Diego

[0m[1;3;38;2;155;135;227m> Running module a71558d7-272a-4f99-b63d-0080597dfec3 with input: 
messages: 
Assistant: I can help determine which region a user input belongs to.
The regions are: United States, California, European Union, United Kingdom, Canada, Australia, India, Singapore, Japan, South Kor...

[0mLLM Prompt Token Usage: 294
LLM Completion Token Usage: 13
[1;3;38;2;155;135;227m> Running module b32d2060-f7c7-4e90-8c18-1df6b1cb78e9 with input: 
input: assistant: {
  "regions": ["California", "United States"]
}

[0m[1;3;38;2;155;135;227m> Running module f0be5ef3-f690-4521-9e1a-f6fb36b54eae with input: 
input: regions=[<RegulationRegions.USCA: 'California'>, <RegulationRegions.US: 'United States'>]

[0m

{'California', 'United States'}

In [5]:
links_df = pd.read_csv('../data/regulations/regulations.csv', encoding='utf-8')
# sub_links_df = links_df.loc[links_df['regions'].isin(regions)]

In [6]:
ppg = PrivacyPolicyGenerator.from_defaults(links_df,
                                           model_url='http://localhost:8080',
                                           verbose=True)

In [7]:
ppg.get_syllabus(["CCPA, GDPR"])

[1;3;38;2;155;135;227m> Running module 598b8fa3-6510-4b52-9d63-f56d1e5b3844 with input: 
regulations: CCPA, GDPR

[0m[1;3;38;2;155;135;227m> Running module 66834189-1d01-4833-a487-60554ff3c36e with input: 
input: I want to write a privacy policy, which should be CCPA, GDPR compliant.
What sections should it include? And for each section, what key points should be covered?
List them as bullet points.

[0mLLM Prompt Token Usage: 469
LLM Completion Token Usage: 127
Generated 4 sub questions.
[1;3;38;2;237;90;200m[General Data Protection Regulation (GDPR)] Q: What sections should a privacy policy include to be GDPR compliant?
[0m[1;3;38;2;90;149;237m[General Data Protection Regulation (GDPR)] Q: What key points should be covered in each section of a GDPR compliant privacy policy?
[0m[1;3;38;2;11;159;203m[California Consumer Privacy Act (CCPA)] Q: What sections should a privacy policy include to be CCPA compliant?
[0m[1;3;38;2;155;135;227m[California Consumer Privacy Act (CCPA)] 

{'Introduction': ['Purpose of the privacy policy.',
  "Overview of the organization's commitment to data protection."],
 'Data Controller Information': ['Identity and contact details of the data controller.',
  'Contact details of the data protection officer, if applicable.'],
 'Data Collection and Usage': ['Types of personal data collected (e.g., names, contact details, IP addresses).',
  'Methods of data collection (e.g., directly from users, through cookies).',
  'Identify the sources from which personal information is collected.',
  'Specific purposes for which personal data is processed.',
  'Legal basis for processing (e.g., consent, contractual necessity, legal obligation).',
  'Explain the purposes for collecting, selling, or sharing personal information.'],
 'Data Subject Rights': ['Rights of data subjects under GDPR (e.g., access, rectification, erasure, restriction of processing, data portability, objection).',
  'Rights of consumers under CCPA (e.g., right to know, delete, 

In [8]:
information = """\

Name and contact data. Your first and last name, email address, postal address, phone number, and other similar contact data.

Credentials. Passwords, password hints, and similar security information used for authentication and account access.

Demographic data. Data about you such as your age, gender, country, and preferred language.

Payment data. Data to process payments, such as your payment instrument number (such as a credit card number) and the security code associated with your payment instrument.
"""

In [9]:
information

'\nName and contact data. Your first and last name, email address, postal address, phone number, and other similar contact data.\n\nCredentials. Passwords, password hints, and similar security information used for authentication and account access.\n\nDemographic data. Data about you such as your age, gender, country, and preferred language.\n\nPayment data. Data to process payments, such as your payment instrument number (such as a credit card number) and the security code associated with your payment instrument.\n'

In [9]:
content = ppg.generate("Data Collection and Usage", information, 1)
print(content['success'])
display(Markdown(f"<b>{content['content']}</b>"))

[1;3;38;2;155;135;227m> Running module 35ab1dcd-f872-4279-aea5-d600a9679c1c with input: 
section_name: Data Collection and Usage
information: 
Name and contact data. Your first and last name, email address, postal address, phone number, and other similar contact data.

Credentials. Passwords, password hints, and similar security information...
key_points: Types of personal data collected (e.g., names, contact details, IP addresses).
Methods of data collection (e.g., directly from users, through cookies).
Identify the sources from which personal informa...

[0m[1;3;38;2;155;135;227m> Running module 71391f57-4cfa-4793-8a5e-329ec70a8c27 with input: 
messages: Please generate a Data Collection and Usage section of a privacy policy based on these information provided below:


Name and contact data. Your first and last name, email address, postal address, pho...

[0mLLM Prompt Token Usage: 396
LLM Completion Token Usage: 593
[1;3;38;2;155;135;227m> Running module 567bfc7c-8b3b-4118-92ee

Failed to pass law model after threshold 1.


[1;3;38;2;155;135;227m> Running module judge_parser with input: 
input: assistant: ```json
{
  "judges": [
    {
      "name": "Data Collection and Usage",
      "suggestions": "It would be helpful to have the entire privacy policy to ensure that all aspects are compliant...

[0m[1;3;38;2;155;135;227m> Running module determine_judge with input: 
section_name: Data Collection and Usage
judges: judges=[Judge(name=<SectionNames.SECTION3: 'Data Collection and Usage'>, suggestions='It would be helpful to have the entire privacy policy to ensure that all aspects are compliant.')]

[0mFalse


<b>We collect and use various types of personal data to provide and improve our services. The types of personal data we collect include:

1. **Name and Contact Data**: This includes your first and last name, email address, postal address, phone number, and other similar contact data.

2. **Credentials**: This includes passwords, password hints, and similar security information used for authentication and account access.

3. **Demographic Data**: This includes data about you such as your age, gender, country, and preferred language.

4. **Payment Data**: This includes data to process payments, such as your payment instrument number (e.g., credit card number) and the security code associated with your payment instrument.

5. **IP Addresses**: This includes data about your device and internet connection, such as IP addresses.

**Methods of Data Collection**:

- **Directly from Users**: We collect data directly from you when you provide it to us, such as when you create an account, make a purchase, or contact us for support.
- **Through Cookies and Tracking Technologies**: We use cookies and similar technologies to collect data about your interactions with our services.

**Sources of Personal Information**:

- **You**: Most of the personal data we collect comes directly from you.
- **Third Parties**: We may also receive data from third parties, such as payment processors and analytics providers.

**Specific Purposes for Processing Personal Data**:

- **To Provide Services**: We use your data to create and manage your account, process transactions, and provide customer support.
- **To Improve Services**: We analyze data to understand how our services are used and to improve them.
- **To Communicate with You**: We use your contact data to send you updates, security alerts, and other information related to your account.
- **To Ensure Security**: We use credentials and other security information to protect your account and our services.
- **Targeted Advertising and Profiling**: We do not use your personal data for targeted advertising or profiling.

**Legal Basis for Processing**:

- **Consent**: We process your data based on your consent, which you can withdraw at any time.
- **Contractual Necessity**: We process your data to fulfill our contract with you, such as providing the services you have requested.
- **Legal Obligation**: We process your data to comply with legal obligations, such as tax and accounting requirements.

**Purposes for Collecting, Selling, or Sharing Personal Information**:

- **Collecting**: We collect personal data to provide and improve our services, communicate with you, and ensure security.
- **Selling**: We do not sell your personal data.
- **Sharing**: We may share your data with third parties for purposes such as payment processing, analytics, and compliance with legal obligations.</b>

## Syllabus Generator

In [5]:
links_df = pd.read_csv('../data/regulations/links.csv', encoding='utf-8')
regulation_query_engine = prepare_regulation_query_engine(links_df)
syllabus_generator = prepare_regulation_syllabus_pipeline(
    regulation_query_engine, verbose=True)

In [6]:
regulations = key_points = syllabus_generator.run("GDPR, PIPEDA")
key_points

[1;3;38;2;155;135;227m> Running module 2890ca84-5a1b-4981-9551-b8e431887204 with input: 
regulations: GDPR, PIPEDA

[0m[1;3;38;2;155;135;227m> Running module a39746d5-ef39-4260-a5cd-ea21e4e77251 with input: 
input: I want to write a privacy policy, which should be GDPR, PIPEDA compliant.
What sections should it include? And for each section, what key points should be covered?
List them as bullet points.

[0mLLM Prompt Token Usage: 470
LLM Completion Token Usage: 139
Generated 4 sub questions.
[1;3;38;2;237;90;200m[General Data Protection Regulation (GDPR)] Q: What sections should a privacy policy include to be GDPR compliant?
[0m[1;3;38;2;90;149;237m[General Data Protection Regulation (GDPR)] Q: For each section of a GDPR compliant privacy policy, what key points should be covered?
[0m[1;3;38;2;11;159;203m[Personal Information Protection and Electronic Documents Act (PIPEDA)] Q: What sections should a privacy policy include to be PIPEDA compliant?
[0m[1;3;38;2;155;135;227m[P

{'Introduction': ['Purpose of the privacy policy.',
  "Overview of the organization's commitment to data protection."],
 'Data Controller Information': ['Identity and contact details of the data controller.',
  'Contact details of the Data Protection Officer (if applicable).'],
 'Data Collection and Usage': ['Categories of personal data collected (e.g., contact details, financial information, IP addresses).',
  'Methods of data collection (e.g., directly from users, through cookies).',
  'Specific purposes for which personal data is processed (e.g., service provision, marketing, compliance with legal obligations).',
  'Legal basis for processing (e.g., consent, performance of a contract, legitimate interests).',
  "Obtain the individual's knowledge and consent for the collection, use, or disclosure of personal information.",
  'Ensure consent is meaningful by explaining the purposes in a manner that individuals can understand.',
  'Allow individuals to withdraw consent, subject to lega

## Judge Generator

In [17]:
generated_section = """\
The data we collect depends on the context of your interactions with Microsoft and the choices you make (including your privacy settings), the products and features you use, your location, and applicable law.

The data we collect can include the following:

Name and contact data. Your first and last name, email address, postal address, phone number, and other similar contact data.

Credentials. Passwords, password hints, and similar security information used for authentication and account access.

Demographic data. Data about you such as your age, gender, country, and preferred language.

Payment data. Data to process payments, such as your payment instrument number (such as a credit card number) and the security code associated with your payment instrument.

Subscription and licensing data. Information about your subscriptions, licenses, and other entitlements.

Microsoft uses the data we collect to provide you rich, interactive experiences. In particular, we use data to:

Provide our products, which includes updating, securing, and troubleshooting, as well as providing support. It also includes sharing data, when it is required to provide the service or carry out the transactions you request.
Improve and develop our products.
Personalize our products and make recommendations.
Advertise and market to you, which includes sending promotional communications, targeting advertising, and presenting you relevant offers.
We also use the data to operate our business, which includes analyzing our performance, meeting our legal obligations, developing our workforce, and doing research."""

In [18]:
# law_llm = prepare_law_llm("Equall/Saul-7B-Instruct-v1")
law_llm = TextGenerationInference(model_url='http://localhost:8080',
                                  model_name="Equall/Saul-7B-Instruct-v1",
                                  token=False)

In [19]:
judge_pipeline = prepare_section_judge_pipeline(law_llm, verbose=True)

In [20]:
judge = await judge_pipeline.arun(
    section_name="Data Collection and Usage",
    section_text=generated_section,
    regulations="GDPR, CCPA",
)
judge

[1;3;38;2;155;135;227m> Running module input with input: 
section_name: Data Collection and Usage
section_text: The data we collect depends on the context of your interactions with Microsoft and the choices you make (including your privacy settings), the products and features you use, your location, and applica...
regulations: GDPR, CCPA

[0m[1;3;38;2;155;135;227m> Running module judge_section_template with input: 
section_name: Data Collection and Usage
section_text: The data we collect depends on the context of your interactions with Microsoft and the choices you make (including your privacy settings), the products and features you use, your location, and applica...
regulations: GDPR, CCPA

[0m[1;3;38;2;155;135;227m> Running module law_llm with input: 
messages: This Data Collection and Usage is a section of a privacy policy:

The data we collect depends on the context of your interactions with Microsoft and the choices you make (including your privacy settin...

[0mLLM Prompt 

{'pass': False,
 'suggestions': "The section does not explicitly mention consent for data collection and usage. It should be made clear that the data is collected with the user's consent and that they have the right to withdraw their consent at any time."}

## Section Modifier

In [11]:
regenerate_pipeline = prepare_generate_pipeline(REGENERATE_PROMPT,
                                                verbose=True)

In [12]:
section_name = "Data Collection and Usage"
regenerate = regenerate_pipeline.run(section_name=section_name,
                                     section_text=generated_section,
                                     suggestions=judge["suggestions"],
                                     key_points=key_points[section_name])
regenerate

[1;3;38;2;155;135;227m> Running module 5dfa4926-a6c5-4bca-9d13-dd87d74f1458 with input: 
section_name: Data Collection and Usage
section_text: The data we collect depends on the context of your interactions with Microsoft and the choices you make (including your privacy settings), the products and features you use, your location, and applica...
suggestions: Clarify the legal basis for processing each type of data collected, such as consent, contract, or legitimate interest.
key_points: ['Categories of personal data collected (e.g., contact details, financial information, IP addresses).', 'Methods of data collection (e.g., directly from users, through cookies).', 'Specific purposes f...

[0m[1;3;38;2;155;135;227m> Running module 079358c9-f022-43c5-a97c-6d036216e6ba with input: 
messages: This Data Collection and Usage is a section of a privacy policy:

The data we collect depends on the context of your interactions with Microsoft and the choices you make (including your privacy setti

SectionContent(name=<SectionNames.SECTION3: 'Data Collection and Usage'>, content='The data we collect depends on the context of your interactions with Microsoft and the choices you make (including your privacy settings), the products and features you use, your location, and applicable law.\n\nThe data we collect can include the following:\n\nName and contact data. Your first and last name, email address, postal address, phone number, and other similar contact data. Legal basis: Performance of a contract, legitimate interests.\n\nCredentials. Passwords, password hints, and similar security information used for authentication and account access. Legal basis: Performance of a contract.\n\nDemographic data. Data about you such as your age, gender, country, and preferred language. Legal basis: Consent.\n\nPayment data. Data to process payments, such as your payment instrument number (such as a credit card number) and the security code associated with your payment instrument. Legal basis: P

In [13]:
def regenerate(
    section_name: str,
    section_text: str,
    suggestions: str,
    key_points: Dict[str, list],
    regulations: List[str],
    threshold: int = 5,
) -> str:

    for i in range(threshold):

        regenerate = regenerate_pipeline.run(
            section_name=section_name,
            section_text=section_text,
            suggestions=suggestions,
            key_points='\n'.join(key_points[section_name]),
        )

        judge = judge_pipeline.run(
            section_name=section_name,
            section_text=regenerate.content,
            regulations='\n'.join(regulations),
        )

        if judge['pass']:
            break

        else:
            suggestions = judge['suggestions']
            section_text = regenerate.content
    else:
        print(f"Failed to pass law model after threshold {threshold}.")

    return regenerate.content

In [15]:
content = regenerate(section_name=section_name,
                     section_text=generated_section,
                     suggestions=judge["suggestions"],
                     key_points=key_points,
                     regulations=["GDPR", "CCPA"])

[1;3;38;2;155;135;227m> Running module 5dfa4926-a6c5-4bca-9d13-dd87d74f1458 with input: 
section_name: Data Collection and Usage
section_text: The data we collect depends on the context of your interactions with Microsoft and the choices you make (including your privacy settings), the products and features you use, your location, and applica...
suggestions: Clarify the legal basis for processing each type of data collected, such as consent, contract, or legitimate interest.
key_points: Categories of personal data collected (e.g., contact details, financial information, IP addresses).
Methods of data collection (e.g., directly from users, through cookies).
Specific purposes for which...

[0m[1;3;38;2;155;135;227m> Running module 079358c9-f022-43c5-a97c-6d036216e6ba with input: 
messages: This Data Collection and Usage is a section of a privacy policy:

The data we collect depends on the context of your interactions with Microsoft and the choices you make (including your privacy setti

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[1;3;38;2;155;135;227m> Running module 169c30c6-6d95-43ab-b622-be23d2a70051 with input: 
input: assistant: ```json
{
  "name": "Data Collection and Usage",
  "content": "The data we collect depends on the context of your interactions with Microsoft and the choices you make (including your privac...

[0m[1;3;38;2;155;135;227m> Running module judge_section_template with input: 
section_name: Data Collection and Usage
section_text: The data we collect depends on the context of your interactions with Microsoft and the choices you make (including your privacy settings), the products and features you use, your location, and applica...
regulations: GDPR
CCPA

[0m[1;3;38;2;155;135;227m> Running module law_llm with input: 
prompt: This Data Collection and Usage is a section of a privacy policy:

The data we collect depends on the context of your interactions with Microsoft and the choices you make (including your privacy settin...

[0mLLM Prompt Token Usage: 522
LLM Completion Token Usage

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[1;3;38;2;155;135;227m> Running module 169c30c6-6d95-43ab-b622-be23d2a70051 with input: 
input: assistant: ```json
{
  "name": "Data Collection and Usage",
  "content": "The data we collect depends on the context of your interactions with Microsoft and the choices you make (including your privac...

[0m[1;3;38;2;155;135;227m> Running module judge_section_template with input: 
section_name: Data Collection and Usage
section_text: The data we collect depends on the context of your interactions with Microsoft and the choices you make (including your privacy settings), the products and features you use, your location, and applica...
regulations: GDPR
CCPA

[0m[1;3;38;2;155;135;227m> Running module law_llm with input: 
prompt: This Data Collection and Usage is a section of a privacy policy:

The data we collect depends on the context of your interactions with Microsoft and the choices you make (including your privacy settin...

[0mLLM Prompt Token Usage: 522
LLM Completion Token Usage

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[1;3;38;2;155;135;227m> Running module 169c30c6-6d95-43ab-b622-be23d2a70051 with input: 
input: assistant: ```json
{
  "name": "Data Collection and Usage",
  "content": "The data we collect depends on the context of your interactions with Microsoft and the choices you make (including your privac...

[0m[1;3;38;2;155;135;227m> Running module judge_section_template with input: 
section_name: Data Collection and Usage
section_text: The data we collect depends on the context of your interactions with Microsoft and the choices you make (including your privacy settings), the products and features you use, your location, and applica...
regulations: GDPR
CCPA

[0m[1;3;38;2;155;135;227m> Running module law_llm with input: 
prompt: This Data Collection and Usage is a section of a privacy policy:

The data we collect depends on the context of your interactions with Microsoft and the choices you make (including your privacy settin...

[0mLLM Prompt Token Usage: 522
LLM Completion Token Usage

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[1;3;38;2;155;135;227m> Running module 169c30c6-6d95-43ab-b622-be23d2a70051 with input: 
input: assistant: ```json
{
  "name": "Data Collection and Usage",
  "content": "The data we collect depends on the context of your interactions with Microsoft and the choices you make (including your privac...

[0m[1;3;38;2;155;135;227m> Running module judge_section_template with input: 
section_name: Data Collection and Usage
section_text: The data we collect depends on the context of your interactions with Microsoft and the choices you make (including your privacy settings), the products and features you use, your location, and applica...
regulations: GDPR
CCPA

[0m[1;3;38;2;155;135;227m> Running module law_llm with input: 
prompt: This Data Collection and Usage is a section of a privacy policy:

The data we collect depends on the context of your interactions with Microsoft and the choices you make (including your privacy settin...

[0mLLM Prompt Token Usage: 522
LLM Completion Token Usage

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[1;3;38;2;155;135;227m> Running module 169c30c6-6d95-43ab-b622-be23d2a70051 with input: 
input: assistant: ```json
{
  "name": "Data Collection and Usage",
  "content": "The data we collect depends on the context of your interactions with Microsoft and the choices you make (including your privac...

[0m[1;3;38;2;155;135;227m> Running module judge_section_template with input: 
section_name: Data Collection and Usage
section_text: The data we collect depends on the context of your interactions with Microsoft and the choices you make (including your privacy settings), the products and features you use, your location, and applica...
regulations: GDPR
CCPA

[0m[1;3;38;2;155;135;227m> Running module law_llm with input: 
prompt: This Data Collection and Usage is a section of a privacy policy:

The data we collect depends on the context of your interactions with Microsoft and the choices you make (including your privacy settin...

[0mLLM Prompt Token Usage: 522
LLM Completion Token Usage

In [16]:
display(Markdown(f"<b>{content}</b>"))

<b>The data we collect depends on the context of your interactions with Microsoft and the choices you make (including your privacy settings), the products and features you use, your location, and applicable law.

The data we collect can include the following:

Name and contact data: Your first and last name, email address, postal address, phone number, and other similar contact data. Legal basis: Performance of a contract, legitimate interests.

Credentials: Passwords, password hints, and similar security information used for authentication and account access. Legal basis: Performance of a contract.

Demographic data: Data about you such as your age, gender, country, and preferred language. Legal basis: Consent.

Payment data: Data to process payments, such as your payment instrument number (such as a credit card number) and the security code associated with your payment instrument. Legal basis: Performance of a contract.

Subscription and licensing data: Information about your subscriptions, licenses, and other entitlements. Legal basis: Performance of a contract, legitimate interests.

Microsoft uses the data we collect to provide you rich, interactive experiences. In particular, we use data to:

Provide our products, which includes updating, securing, and troubleshooting, as well as providing support. It also includes sharing data, when it is required to provide the service or carry out the transactions you request. Legal basis: Performance of a contract, legitimate interests.

Improve and develop our products. Legal basis: Legitimate interests.

Personalize our products and make recommendations. Legal basis: Consent.

Advertise and market to you, which includes sending promotional communications, targeting advertising, and presenting you relevant offers. Legal basis: Consent.

We also use the data to operate our business, which includes analyzing our performance, meeting our legal obligations, developing our workforce, and doing research. Legal basis: Legitimate interests, compliance with legal obligations.

Methods of data collection include directly from users and through cookies. We obtain your knowledge and consent for the collection, use, or disclosure of personal information. Consent is meaningful and explained in a manner that individuals can understand. You may withdraw consent at any time, subject to legal or contractual restrictions. We limit the collection of personal information to what is necessary for the identified purposes and collect information by fair and lawful means.</b>

## Save links

In [11]:
law_links = {
    'regulations': [
        'General Data Protection Regulation (GDPR)',
        'California Consumer Privacy Act (CCPA)',
        'Personal Information Protection and Electronic Documents Act (PIPEDA)'
    ],
    'links': [
        'https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=CELEX:32016L0680',
        'https://leginfo.legislature.ca.gov/faces/codes_displayText.xhtml?division=3.&part=4.&lawCode=CIV&title=1.81.5',
        'https://laws-lois.justice.gc.ca/ENG/ACTS/P-8.6/FullText.html'
    ]
}

In [12]:
pd.DataFrame(law_links).to_csv('../data/regulations/links.csv',
                               index=False,
                               encoding='utf-8')

## To QueryEngines

In [5]:
links_df = pd.read_csv('../data/regulations/Region Data Regulation.csv',
                       encoding='utf-8')
links_df

Unnamed: 0,regulations,regions,links
0,General Data Protection Regulation (GDPR),European Union,https://eur-lex.europa.eu/legal-content/EN/TXT...
1,California Consumer Privacy Act (CCPA),California,https://leginfo.legislature.ca.gov/faces/codes...
2,Personal Information Protection and Electronic...,Canada,https://laws-lois.justice.gc.ca/ENG/ACTS/P-8.6...
3,Privacy Act 1988,Australia,https://www.legislation.gov.au/C2024A00024/lat...
4,Data Protection Act 2018,United Kingdom,https://www.legislation.gov.uk/ukpga/2018/12/c...
5,APPI (Act on the Protection of Personal Inform...,Japan,https://www.japaneselawtranslation.go.jp/en/la...
6,PIPA (Personal Information Protection Act),South Korea,https://www.law.go.kr/LSW/lsInfoP.do?lsiSeq=21...
7,LGPD (Lei Geral de Proteção de Dados),Brazil,https://www.planalto.gov.br/ccivil_03/_ato2015...
8,DPDP Act (Digital Personal Data Protection Act),India,https://meity.gov.in/writereaddata/files/Digit...
9,POPIA (Protection of Personal Information Act),South Africa,https://popia.co.za/


In [6]:
regulation_text = SimpleWebPageReader(html_to_text=True).load_data(
    links_df['links'].to_list()[:3])

In [7]:
regulation_text

 Document(id_='https://leginfo.legislature.ca.gov/faces/codes_displayText.xhtml?division=3.&part=4.&lawCode=CIV&title=1.81.5', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text="  * skip to content\n  * home\n  * accessibility\n  * FAQ\n  * feedback\n  * sitemap\n  * login\n  * x\n\n![Legislative Information header image: click to go to the home\npage](/resources/images/header_img.png)\n\n![Legislative Information header\nimage](/resources/images/header_img_mobile_01.png) ![home image: click to go\nto the home page](/resources/images/header_img_mobile_02.png) ![Bill Search\nimage: click to go to the bill search\npage](/resources/images/header_img_mobile_025.png) [ ![back button image:\nclick to go to previous page](/resources/images/header_img_mobile_03.png)]()  \n![Legislative Information text\nimage](/resources/images/header_img_mobile_04.png)\n\n![magnifying glass image](/resources/images/mag_glass.png) Quick Search:\

In [9]:
summary_index = SummaryIndex.from_documents([regulation_text[0]])

In [None]:
summary_index.as_query_engine()

In [6]:
# regulation_query_tools = [
#     QueryEngineTool(
#         query_engine=VectorStoreIndex.from_documents(
#             [regulation_text[i]]).as_query_engine(llm=llm),
#         metadata=ToolMetadata(
#             name=links_df.loc[i, 'regulations'],
#             description=("refer the text when you want to make something be "
#                          f"{links_df.loc[i, 'regulations']} compliant"),
#         ),
#     ) for i in range(len(regulation_text))
# ]

In [6]:
regulation_query_tools = [
    QueryEngineTool(
        query_engine=SummaryIndex.from_documents([
            regulation_text[i]
        ]).as_query_engine(response_mode="tree_summarize", llm=llm),
        metadata=ToolMetadata(
            name=links_df.loc[i, 'regulations'],
            description=("refer the text when you want to make something be "
                         f"{links_df.loc[i, 'regulations']} compliant"),
        ),
    ) for i in range(len(regulation_text))
]

In [7]:
regulation_query_engine = SubQuestionQueryEngine.from_defaults(
    query_engine_tools=regulation_query_tools, llm=llm, verbose=True)

## Syllabus Generator (RAW)

In [6]:
ask_syllabus_template = PromptTemplate(ASK_SYLLABUS_TEMPLATE)

In [6]:
class SectionNames(str, Enum):
    SECTION1 = "Introduction"
    SECTION2 = "Data Controller Information"
    SECTION3 = "Data Collection and Usage"
    SECTION4 = "Data Subject Rights"
    SECTION5 = "Data Sharing and Transfers"
    SECTION6 = "Data Retention"
    SECTION7 = "Disclosure of Personal Information"
    SECTION8 = "Security Measures"
    SECTION9 = "Automated Decision-Making and Profiling"
    SECTION10 = "Cookies and Tracking Technologies"
    SECTION11 = "Changes to the Privacy Policy"
    SECTION12 = "Contact Information"


class Section(BaseModel):
    name: SectionNames = Field(..., title="Section Name")
    key_points: List[str] = Field(..., title="List of Key Points")


SectionList = conlist(Section, min_length=12, max_length=12)


class Syllabus(BaseModel):
    sections: SectionList = Field(..., title="List of Sections")

In [7]:
output_parser = PydanticOutputParser(SyllabusFormat)
split_section_template = PromptTemplate(
    output_parser.format(FORMAT_SYLLABUS_TEMPLATE))


In [14]:
print(split_section_template.template)

Given the privacy policy sections and key points:

{query_str}

DO NOT modify the key points!!!
Please rearrange the sections and key points and output with the following JSON format:


Here's a JSON schema to follow:
{{"$defs": {{"Section": {{"properties": {{"name": {{"allOf": [{{"$ref": "#/$defs/SectionNames"}}], "title": "Section Name"}}, "key_points": {{"items": {{"type": "string"}}, "title": "List of Key Points", "type": "array"}}}}, "required": ["name", "key_points"], "title": "Section", "type": "object"}}, "SectionNames": {{"enum": ["Introduction", "Data Controller Information", "Data Collection and Usage", "Data Subject Rights", "Data Sharing and Transfers", "Data Retention", "Disclosure of Personal Information", "Security Measures", "Automated Decision-Making and Profiling", "Cookies and Tracking Technologies", "Changes to the Privacy Policy", "Contact Information"], "title": "SectionNames", "type": "string"}}}}, "properties": {{"sections": {{"items": {{"$ref": "#/$defs/Sectio

In [8]:
syllabus_generator.module_dict[list(syllabus_generator.module_dict.keys(
))[2]].prompt.template == split_section_template.template

True

In [9]:
p = QueryPipeline(chain=[
    ask_syllabus_template,
    regulation_query_engine,
    split_section_template,
    llm,
    output_parser,
],
                  verbose=True)

In [13]:
output = p.run(regulations="GDPR, PIPEDA")
# display(Markdown(f"<b>{output.response}</b>"))

[1;3;38;2;155;135;227m> Running module b637576c-0721-486a-9359-7f481394a8d9 with input: 
regulations: GDPR, PIPEDA

[0m[1;3;38;2;155;135;227m> Running module c6be2272-cd88-43cf-be35-48299779b46a with input: 
input: I want to write a privacy policy, which should be GDPR, PIPEDA compliant. 
    What sections should it include? And for each section, what key points should be covered?
    List them as bullet points....

[0mGenerated 4 sub questions.
[1;3;38;2;237;90;200m[General Data Protection Regulation (GDPR)] Q: What sections should a GDPR compliant privacy policy include?
[0m[1;3;38;2;90;149;237m[General Data Protection Regulation (GDPR)] Q: For each section of a GDPR compliant privacy policy, what key points should be covered?
[0m[1;3;38;2;11;159;203m[Personal Information Protection and Electronic Documents Act (PIPEDA)] Q: What sections should a PIPEDA compliant privacy policy include?
[0m[1;3;38;2;155;135;227m[Personal Information Protection and Electronic Documents Act 

In [60]:
output.sections

[Section(name=<SectionNames.SECTION1: 'Introduction'>, key_points=['Purpose of the privacy policy.', "Overview of the organization's commitment to data protection."]),
 Section(name=<SectionNames.SECTION2: 'Data Controller Information'>, key_points=['Identity and contact details of the data controller.', 'Contact details of the Data Protection Officer (if applicable).']),
 Section(name=<SectionNames.SECTION3: 'Data Collection and Usage'>, key_points=['Categories of personal data collected (e.g., contact details, financial information).', 'Methods of data collection (e.g., directly from users, through cookies).', 'Specific purposes for which personal data is processed.', 'Legal basis for each processing activity (e.g., consent, performance of a contract, legal obligation).', "Obtain the individual's knowledge and consent for the collection, use, or disclosure of personal information, except where inappropriate.", 'Ensure consent is meaningful by explaining the purposes in a manner that 

In [61]:
for i, section in enumerate(output.sections):
    display(
        Markdown(f"<b>**{i+1}. {section.name.value}**\n\n&ensp;- " +
                 '\n\n&ensp;- '.join(section.key_points) + "</b>"))

<b>**1. Introduction**

&ensp;- Purpose of the privacy policy.

&ensp;- Overview of the organization's commitment to data protection.</b>

<b>**2. Data Controller Information**

&ensp;- Identity and contact details of the data controller.

&ensp;- Contact details of the Data Protection Officer (if applicable).</b>

<b>**3. Data Collection and Usage**

&ensp;- Categories of personal data collected (e.g., contact details, financial information).

&ensp;- Methods of data collection (e.g., directly from users, through cookies).

&ensp;- Specific purposes for which personal data is processed.

&ensp;- Legal basis for each processing activity (e.g., consent, performance of a contract, legal obligation).

&ensp;- Obtain the individual's knowledge and consent for the collection, use, or disclosure of personal information, except where inappropriate.

&ensp;- Ensure consent is meaningful by explaining the purposes in a manner that individuals can understand.

&ensp;- Limit the collection of personal information to what is necessary for the identified purposes.

&ensp;- Collect information by fair and lawful means.</b>

<b>**4. Data Subject Rights**

&ensp;- Right to access personal data.

&ensp;- Right to rectification, erasure, and restriction of processing.

&ensp;- Right to data portability.

&ensp;- Right to object to processing.

&ensp;- Right to withdraw consent at any time.</b>

<b>**5. Data Sharing and Transfers**

&ensp;- Categories of recipients with whom personal data is shared.

&ensp;- Information on international data transfers and safeguards in place (e.g., adequacy decisions, standard contractual clauses).</b>

<b>**6. Data Retention**

&ensp;- Criteria used to determine retention periods for personal data.

&ensp;- Specific retention periods where applicable.

&ensp;- Retain personal information only as long as necessary for the fulfillment of those purposes.</b>

<b>**7. Disclosure of Personal Information**

&ensp;- Use or disclose personal information only for the purposes for which it was collected, unless consent is obtained or as required by law.

&ensp;- Inform individuals of the existence, use, and disclosure of their personal information upon request.

&ensp;- Provide access to personal information and allow individuals to challenge the accuracy and completeness of the information and have it amended as appropriate.</b>

<b>**8. Security Measures**

&ensp;- Description of technical and organizational measures to protect personal data.

&ensp;- Procedures in place to handle data breaches.</b>

<b>**9. Automated Decision-Making and Profiling**

&ensp;- Information on any automated decision-making, including profiling.

&ensp;- Significance and consequences of such processing for data subjects.</b>

<b>**10. Cookies and Tracking Technologies**

&ensp;- Types of cookies and tracking technologies used.

&ensp;- Purpose of using cookies.

&ensp;- How users can manage cookie preferences.</b>

<b>**11. Changes to the Privacy Policy**

&ensp;- How and when changes to the privacy policy will be communicated to users.

&ensp;- Effective date of the current privacy policy.</b>

<b>**12. Contact Information**

&ensp;- Information on how to lodge a complaint with the organization.

&ensp;- Contact details of the relevant supervisory authority for data protection issues.

&ensp;- Provide mechanisms for individuals to challenge the organization's compliance with the above principles.

&ensp;- Investigate and respond to complaints regarding the handling of personal information.</b>

In [27]:
output.sections[0].key_points

['Purpose of the privacy policy.',
 "Overview of the organization's commitment to data protection.",
 'Scope of the privacy policy.']

In [11]:
output = regulation_query_engine.query(
    """I want to write a privacy policy, which should be GDPR and CCPA compliant.
    What sections should it include? And for each section, what key points should be covered?
    List them as bullet points.
    """)
display(Markdown(f"<b>{output.response}</b>"))

Generated 4 sub questions.
[1;3;38;2;237;90;200m[General Data Protection Regulation (GDPR)] Q: What sections should a GDPR compliant privacy policy include?
[0m[1;3;38;2;90;149;237m[General Data Protection Regulation (GDPR)] Q: For each section of a GDPR compliant privacy policy, what key points should be covered?
[0m[1;3;38;2;11;159;203m[California Consumer Privacy Act (CCPA)] Q: What sections should a CCPA compliant privacy policy include?
[0m[1;3;38;2;155;135;227m[California Consumer Privacy Act (CCPA)] Q: For each section of a CCPA compliant privacy policy, what key points should be covered?
[0m[1;3;38;2;237;90;200m[General Data Protection Regulation (GDPR)] A: A GDPR compliant privacy policy should include the following sections:

1. **Introduction**: An overview of the policy and its purpose.
2. **Data Controller Information**: Details about the entity responsible for data processing.
3. **Data Protection Officer**: Contact information if a Data Protection Officer is app

<b>To write a privacy policy that is both GDPR and CCPA compliant, you should include the following sections and cover the key points for each:

1. **Introduction**:
   - Purpose of the privacy policy.
   - Overview of the organization's commitment to data protection.
   - Overview of GDPR and CCPA compliance.

2. **Data Controller Information**:
   - Name and contact details of the data controller.
   - Contact details of the Data Protection Officer (DPO), if applicable.

3. **Categories of Personal Information Collected**:
   - Types of personal information collected (e.g., identifiers, contact details, financial information, biometric information).
   - Methods of data collection (e.g., directly from users, through website forms).

4. **Sources of Personal Information**:
   - Categories of sources from which personal information is collected.

5. **Purpose of Data Processing**:
   - Specific purposes for which personal data is processed (e.g., service provision, marketing, legal compliance).
   - Legal basis for processing (e.g., consent, contractual necessity, legal obligation).
   - Whether the information is sold or shared.

6. **Legal Basis for Processing**:
   - The legal grounds for processing personal data under GDPR.

7. **Data Subject and Consumer Rights**:
   - Rights of data subjects under GDPR (e.g., access, rectification, erasure, restriction of processing, data portability, objection).
   - Rights of consumers under CCPA (e.g., right to know, access, delete, correct inaccurate personal information, opt-out of sale or sharing, limit use of sensitive personal information, no retaliation).
   - How data subjects and consumers can exercise their rights.

8. **Data Retention**:
   - Criteria for determining data retention periods.
   - Specific retention periods for different types of data, if applicable.

9. **Data Sharing and Recipients**:
   - Categories of recipients with whom data may be shared (e.g., service providers, business partners).
   - Information on international data transfers and safeguards in place.

10. **Security Measures**:
    - Description of technical and organizational measures in place to protect personal data.
    - Procedures for handling data breaches.

11. **Automated Decision-Making**:
    - Information on any automated decision-making processes, including profiling.

12. **Cookies and Tracking Technologies**:
    - Types of cookies and tracking technologies used.
    - Purpose of using cookies and how users can manage their cookie preferences.

13. **Financial Incentives**:
    - Description of any financial incentives offered in exchange for personal information.
    - Terms of the financial incentive program and how consumers can opt-in or opt-out.

14. **Non-Discrimination**:
    - Statement that the business will not discriminate against consumers for exercising their CCPA rights.

15. **Complaints**:
    - How to lodge a complaint with a supervisory authority.

16. **Changes to the Privacy Policy**:
    - How and when changes to the privacy policy will be communicated to users.
    - Effective date of the current privacy policy.

17. **Contact Information**:
    - How users can contact the organization for privacy-related inquiries or complaints.
    - Methods for consumers to submit requests regarding their personal information, including at least one designated method such as a toll-free telephone number or an email address.

Each section should be written in clear and plain language to ensure that it is easily understandable by the data subjects and consumers.</b>

In [9]:
output = regulation_query_engine.query(
    """I want to write a privacy policy, which should be GDPR and CCPA compliant.
    
    It includes the following sections:
    1. Introduction
    2. Identity and Contact Details of the Controller
    3. Data Collection and Use
    4. Data Subject Rights
    5. Data Sharing and Transfers
    6. Data Retention
    7. Security Measures
    8. Automated Decision-Making and Profiling
    9. Cookies and Tracking Technologies
    10. Changes to the Privacy Policy
    11. Contact Information

    Now given the sections, I want to know what should be included in each section to be compliant with GDPR and CCPA.
    List them as bullet points.
    """)
display(Markdown(f"<b>{output.response}</b>"))

Generated 22 sub questions.
[1;3;38;2;237;90;200m[General Data Protection Regulation (GDPR)] Q: What should be included in the Introduction section to be GDPR compliant?
[0m[1;3;38;2;90;149;237m[General Data Protection Regulation (GDPR)] Q: What should be included in the Identity and Contact Details of the Controller section to be GDPR compliant?
[0m[1;3;38;2;11;159;203m[General Data Protection Regulation (GDPR)] Q: What should be included in the Data Collection and Use section to be GDPR compliant?
[0m[1;3;38;2;155;135;227m[General Data Protection Regulation (GDPR)] Q: What should be included in the Data Subject Rights section to be GDPR compliant?
[0m[1;3;38;2;237;90;200m[General Data Protection Regulation (GDPR)] Q: What should be included in the Data Sharing and Transfers section to be GDPR compliant?
[0m[1;3;38;2;90;149;237m[General Data Protection Regulation (GDPR)] Q: What should be included in the Data Retention section to be GDPR compliant?
[0m[1;3;38;2;11;159;203

<b>To ensure your privacy policy is compliant with both GDPR and CCPA, each section should include the following elements:

### 1. Introduction
- **GDPR**:
  - Identity of the data controller.
  - Existence of the data processing operation.
  - Purposes of the data processing.
  - Right of the data subject to lodge a complaint.
  - Right of the data subject to request access to, rectification or erasure of personal data, or restriction of processing.
- **CCPA**:
  - Categories of personal information collected.
  - Purposes for collection.
  - Disclosure of sale or sharing of personal information.
  - Categories and purposes of sensitive personal information collected.
  - Retention period for each category of personal information.
  - Opt-out information.
  - Accessibility for consumers with disabilities and in the primary language used to interact with the consumer.

### 2. Identity and Contact Details of the Controller
- **GDPR**:
  - Identity of the controller.
  - Existence of the processing operation.
  - Purposes of the processing.
  - Right to lodge a complaint.
  - Right to request access to and rectification or erasure of personal data or restriction of processing.
  - Legal basis for the processing.
  - Data storage duration.
- **CCPA**:
  - Name or names of the owner, director, officer, or management employee.
  - Contact information, including business title.

### 3. Data Collection and Use
- **GDPR**:
  - Identity of the controller.
  - Existence of the processing operation.
  - Purposes of the processing.
  - Legal basis for processing.
  - Categories of personal data.
  - Data subject rights.
  - Safeguards for sensitive data.
  - Time limits for data storage.
  - Quality and accuracy of data.
  - Automated decision-making information.
  - Right to lodge a complaint.
  - Additional information for identity verification.
- **CCPA**:
  - Categories of personal information collected.
  - Categories of sensitive personal information collected.
  - Retention period for each category of personal information.
  - Purpose of collection and use.
  - Third-party agreements.
  - Security measures.
  - Consumer rights to request deletion.
  - Opt-out options.

### 4. Data Subject Rights
- **GDPR**:
  - Identity of the controller.
  - Existence of the processing operation.
  - Purposes of the processing.
  - Right to lodge a complaint.
  - Right to request access to and rectification or erasure of personal data or restriction of processing.
  - Legal basis for the processing.
  - Data storage duration.
  - How to exercise these rights.
- **CCPA**:
  - Right to request correction of inaccurate personal information.
  - Right to know what personal information is being collected.
  - Right to opt-out of the sale or sharing of personal information.
  - Right to limit the use or disclosure of sensitive personal information.
  - How to exercise these rights.
  - Disclosure of financial incentives.

### 5. Data Sharing and Transfers
- **GDPR**:
  - Conditions for transfers to third countries or international organizations.
  - Protections for sensitive personal data.
  - Automated decision-making safeguards.
  - Information accessibility.
  - Mechanisms for data subject rights.
  - Identity confirmation procedures.
  - Mandatory information.
  - Adequacy decisions.
  - Periodic review and monitoring.
  - Consultation and remediation procedures.
  - Appropriate safeguards for non-adequacy decision transfers.
- **CCPA**:
  - Identification of the consumer and association of information.
  - Categories of personal information sold or shared.
  - Categories of personal information disclosed for a business purpose.
  - Description of consumer rights and methods for submitting requests.
  - Lists of categories of personal information collected, sold, shared, and disclosed.
  - Opt-out information.
  - Terms of financial incentives.

### 6. Data Retention
- **GDPR**:
  - Purpose limitation.
  - Time limits for data retention.
  - Review procedures.
  - Legal basis for processing and storage duration.
  - Data subject rights.
  - Compliance responsibility.
- **CCPA**:
  - Retention period for each category of personal information.
  - Criteria used to determine retention period.
  - Statement on retention duration relative to the disclosed purpose.

### 7. Security Measures
- **GDPR**:
  - Evaluation of risks inherent in data processing.
  - Implementation of measures to mitigate risks.
  - Appropriate level of security, including confidentiality.
  - Technical and organizational measures (e.g., encryption).
- **CCPA**:
  - Reasonable security procedures and practices.
  - Protection against unauthorized or illegal access, destruction, use, modification, or disclosure.

### 8. Automated Decision-Making and Profiling
- **GDPR**:
  - Transparency and information about automated decision-making.
  - Legal basis for automated decision-making.
  - Rights of data subjects (human intervention, express point of view, contest decision).
  - Safeguards for fairness, transparency, and non-discrimination.
  - Data minimization and accuracy.
  - Data Protection Impact Assessments (DPIAs).
  - Security measures.
  - Regular reviews.
- **CCPA**:
  - Opt-out links for sale or sharing of personal information.
  - Description of consumer rights.
  - Mechanism for exercising rights without creating an account.
  - Use of personal information solely for compliance with requests.

### 9. Cookies and Tracking Technologies
- **GDPR**:
  - Types of cookies and tracking technologies used.
  - Purpose of each cookie and tracking technology.
  - Data collected.
  - Legal basis for processing.
  - Duration of data storage.
  - Third-party access.
  - User rights.
  - Instructions for managing cookies.
  - Impact of disabling cookies.
  - Contact information.
- **CCPA**:
  - Description of consumer rights regarding cookies and tracking technologies.
  - Opt-out links for sale or sharing of personal information.
  - Information on financial incentives related to personal information.

### 10. Changes to the Privacy Policy
- **GDPR**:
  - Notification of changes.
  - Effective date of changes.
  - Description of changes.
  - User rights regarding changes.
  - Access to previous versions.
  - Contact information.
- **CCPA**:
  - Disclosure of consumer rights.
  - Procedures for requests.
  - Business obligations.
  - Frequency and circumstances for requests.
  - Opt-out information.
  - Updates and notifications.
  - Accessibility.

### 11. Contact Information
- **GDPR**:
  - Identity of the controller.
  - Existence of the processing operation.
  - Purposes of the processing.
  - Right to lodge a complaint.
  - Right to request access to and rectification or erasure of personal data or restriction of processing.
  - Legal basis for processing.
  - Data storage duration.
- **CCPA**:
  - Contact information for handling consumer inquiries.
  - Opt-out links for sale or sharing of personal information.
  - Links to limit the use or disclosure of sensitive personal information.</b>

In [13]:
# from llama_index.postprocessor.cohere_rerank import CohereRerank
from llama_index.core.response_synthesizers import TreeSummarize
from llama_index.core.query_pipeline import QueryPipeline
from llama_index.core import PromptTemplate

# define modules
prompt_str = "Please generate a question about Paul Graham's life regarding the following topic {topic}"
prompt_tmpl = PromptTemplate(prompt_str)
llm = OpenAI(model="gpt-3.5-turbo")
retriever = SummaryIndex.from_documents([regulation_text[0]
                                         ]).as_retriever(similarity_top_k=3)
# reranker = CohereRerank()
summarizer = TreeSummarize(llm=llm)

In [None]:
index = SummaryIndex.from_documents([regulation_text[0]])

In [9]:
synthesizer = get_response_synthesizer(llm=llm,
                                       response_mode=ResponseMode.COMPACT)

In [None]:
index.as_retriever().retrieve(
    "What sections should a GDPR compliant privacy policy include? And for each section, what key points should be covered?"
)

[NodeWithScore(node=TextNode(id_='8c8e6d71-525e-431a-9e1f-5265fba77978', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=CELEX:32016L0680', node_type=<ObjectType.DOCUMENT: '4'>, metadata={}, hash='caf142112d0f6a347f255b21c17bed3b471f6c85a139a8ffc943ead6774c8819'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='738348da-0799-4937-9547-8bee98317c52', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='ca82bc8311cc6b2e31c37be9b4f045d8b8594ac366ae78e8f72c551c3520a820')}, text='4.5.2016\n\n|\n\nEN\n\n|\n\nOfficial Journal of the European Union\n\n|\n\nL 119/89  \n  \n---|---|---|---  \n  \n* * *\n\nDIRECTIVE (EU) 2016/680 OF THE EUROPEAN PARLIAMENT AND OF THE COUNCIL\n\nof 27 April 2016\n\non the protection of natural persons with regard to the processing of personal\ndata by competent authorities for the purpo

In [19]:
synthesizer.get_response?

[1;31mSignature:[0m
[0msynthesizer[0m[1;33m.[0m[0mget_response[0m[1;33m([0m[1;33m
[0m    [0mquery_str[0m[1;33m:[0m [0mstr[0m[1;33m,[0m[1;33m
[0m    [0mtext_chunks[0m[1;33m:[0m [0mSequence[0m[1;33m[[0m[0mstr[0m[1;33m][0m[1;33m,[0m[1;33m
[0m    [0mprev_response[0m[1;33m:[0m [0mUnion[0m[1;33m[[0m[0mpydantic[0m[1;33m.[0m[0mv1[0m[1;33m.[0m[0mmain[0m[1;33m.[0m[0mBaseModel[0m[1;33m,[0m [0mstr[0m[1;33m,[0m [0mGenerator[0m[1;33m[[0m[0mstr[0m[1;33m,[0m [0mNoneType[0m[1;33m,[0m [0mNoneType[0m[1;33m][0m[1;33m,[0m [0mAsyncGenerator[0m[1;33m[[0m[0mstr[0m[1;33m,[0m [0mNoneType[0m[1;33m][0m[1;33m,[0m [0mNoneType[0m[1;33m][0m [1;33m=[0m [1;32mNone[0m[1;33m,[0m[1;33m
[0m    [1;33m**[0m[0mresponse_kwargs[0m[1;33m:[0m [0mAny[0m[1;33m,[0m[1;33m
[0m[1;33m)[0m [1;33m->[0m [0mUnion[0m[1;33m[[0m[0mpydantic[0m[1;33m.[0m[0mv1[0m[1;33m.[0m[0mmain[0m[1;33m.[0m[0mBaseModel[

## Section Modifier (RAW)

In [None]:
REGENERATE_PROMPT = """\
This {section_name} is a section of a privacy policy:

{section_text}

Now, please revise the section based on the following suggestions:

{suggestions}

NOTE THAT: The revised section should comply with the following key points:

{key_points}
"""

In [None]:
class SectionContent(BaseModel):
    name: SectionNames = Field(..., title="Section Name")
    content: str = Field(..., title="Section Content")

In [None]:
regenerate_parser = PydanticOutputParser(SectionContent)
regenerate_template = PromptTemplate(
    regenerate_parser.format(REGENERATE_PROMPT))

In [None]:
regenerate_pipeline = QueryPipeline(
    chain=[regenerate_template, Settings.llm, regenerate_parser],
    verbose=True,
)

In [None]:
section_name = "Data Collection and Usage"
regenerate = regenerate_pipeline.run(section_name=section_name,
                                     section_text=generated_section,
                                     suggestions=judge["suggestions"],
                                     key_points=key_points[section_name])
regenerate

[1;3;38;2;155;135;227m> Running module 73deb173-6f71-40d6-a4bb-d3aa383412d5 with input: 
section_name: Data Collection and Usage
section_text: The data we collect depends on the context of your interactions with Microsoft and the choices you make (including your privacy settings), the products and features you use, your location, and applica...
suggestions: Clarify the legal basis for processing each type of data collected, such as consent, contract, or legitimate interest.
key_points: ['Types of personal data collected.', 'Methods of data collection (e.g., directly from users, through cookies).', 'Specific purposes for which personal data is processed.', 'Legal basis for processing...

[0m[1;3;38;2;155;135;227m> Running module 58789490-a132-4afc-be1a-02d2fad983fd with input: 
messages: This Data Collection and Usage is a section of a privacy policy:

The data we collect depends on the context of your interactions with Microsoft and the choices you make (including your privacy setti

SectionContent(name=<SectionNames.SECTION3: 'Data Collection and Usage'>, content='The data we collect depends on the context of your interactions with Microsoft and the choices you make (including your privacy settings), the products and features you use, your location, and applicable law.\n\nThe data we collect can include the following:\n\nName and contact data. Your first and last name, email address, postal address, phone number, and other similar contact data. Legal basis: Contractual necessity and legitimate interest.\n\nCredentials. Passwords, password hints, and similar security information used for authentication and account access. Legal basis: Contractual necessity.\n\nDemographic data. Data about you such as your age, gender, country, and preferred language. Legal basis: Consent and legitimate interest.\n\nPayment data. Data to process payments, such as your payment instrument number (such as a credit card number) and the security code associated with your payment instrume

In [None]:
judge = judge_pipeline.run(
    section_name=section_name,
    section_text=regenerate.content,
    regulations="GDPR, CCPA",
)
judge

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[1;3;38;2;155;135;227m> Running module judge_section_template with input: 
section_name: Data Collection and Usage
section_text: The data we collect depends on the context of your interactions with Microsoft and the choices you make (including your privacy settings), the products and features you use, your location, and applica...
regulations: GDPR, CCPA

[0m[1;3;38;2;155;135;227m> Running module law_llm with input: 
prompt: This Data Collection and Usage is a section of a privacy policy:

The data we collect depends on the context of your interactions with Microsoft and the choices you make (including your privacy settin...

[0mLLM Prompt Token Usage: 497
LLM Completion Token Usage: 234
[1;3;38;2;155;135;227m> Running module format_judge_template with input: 
query_str: This section appears to be compliant with the GDPR and CCPA regulations, as it outlines the data collected by Microsoft, the purposes for which the data is collected, and the legal bases for processin...

[0m[1;

{'pass': False,
 'suggestions': "Clarify the legal bases for each type of data collected. While the section mentions 'Contractual necessity and legitimate interest' as the legal bases for most data, it would be helpful to provide more specific information for each type of data collected."}

## Judge Generator (RAW)

In [None]:
JUDGE_SECTION_TEMPLATE = """\
This {section_name} is a section of a privacy policy:

{section_text}

Please judge whether this section is compliant with the {regulations} regulations. If not, please provide suggestions on how to improve it.
NOTE THAT this is just one section, not the whole privacy policy. Only judge this section in isolation.
"""
FORMAT_JUDGE_TEMPLATE = """\
Given the comments from a legal expert:

{query_str}

Please extract the legal expert's suggestions on how to improve the sections in a privacy policy.
"""

In [None]:
class Judge(BaseModel):

    name: SectionNames = Field(..., title="Section Name")

    suggestions: str = Field(
        ...,
        title="Suggestions",
        description="Suggestions for improvement.",
    )


class Judges(BaseModel):

    judges: List[Judge] = Field(
        ...,
        title="Judges",
        description="Judges for the sections.",
    )
    # suggestions: Dict[SectionNames, str] = Field(
    #     ...,
    #     title="Suggestions",
    #     description="Suggestions for sections improvement.",
    # )


# class Judges2(BaseModel):
#     improvements: Dict[SectionNames, str] = Field(
#         ...,
#         title="improvements",
#         description="Suggestions for each section's improvement.",
#     )

In [7]:
judge_section_template = PromptTemplate(JUDGE_SECTION_TEMPLATE)
judge_parser = PydanticOutputParser(Judges)
format_judge_template = PromptTemplate(
    judge_parser.format(FORMAT_JUDGE_TEMPLATE))

In [54]:
test_func = FnComponent(
    fn=lambda input, section_name: f"{len(input.judges)}, {len(section_name)}")
# {
#     "section_name": section_name,
#     "section_text": section_text,
#     "regulations": regulations,
# },

In [8]:
def determine_judge(input: Judges, section_name: str):

    for judge in input.judges:
        if judge.name.value == section_name:
            return {"pass": False, "suggestions": judge.suggestions}

        return {"pass": True, "suggestions": ""}


In [10]:
judge_pipeline = QueryPipeline(verbose=True)
judge_pipeline.add_modules({
    "judge_section_template": judge_section_template,
    "law_llm": law_llm,
    "format_judge_template": format_judge_template,
    "llm": Settings.llm,
    "judge_parser": judge_parser,
    "determine_judge": FnComponent(fn=determine_judge),
})
judge_pipeline.add_link("judge_section_template", "law_llm")
judge_pipeline.add_link("law_llm", "format_judge_template")
judge_pipeline.add_link("format_judge_template", "llm")
judge_pipeline.add_link("llm", "judge_parser")
judge_pipeline.add_link("judge_parser", "determine_judge")

In [11]:
result = judge_pipeline.run_multi({
    "judge_section_template": {
        'section_name': "Data Collection and Usage",
        'section_text': generated_section,
        'regulations': "GDPR, CCPA",
    },
    "determine_judge": {
        'section_name': "Data Collection and Usage"
    }
})

ValueError: Expected root keys do not match up with input keys.
Expected root keys: ['judge_section_template']
Input keys: dict_keys(['judge_section_template', 'determine_judge'])


In [66]:
q = QueryPipeline(verbose=True)
q.add_modules({
    "judge_section_template": judge_section_template,
    "law_llm": law_llm,
    "format_judge_template": format_judge_template,
    "llm": Settings.llm,
    "judge_parser": judge_parser,
    "determine_judge": FnComponent(fn=determine_judge),
})
q.add_link("judge_section_template", "law_llm")
q.add_link("law_llm", "format_judge_template")
q.add_link("format_judge_template", "llm")
q.add_link("llm", "judge_parser")
q.add_link("judge_parser", "determine_judge", dest_key="judges")
q.add_link("judge_section_template", "determine_judge", dest_key="prompt")

In [56]:
q.module_dict

{'judge_section_template': PromptComponent(partial_dict={}, prompt=PromptTemplate(metadata={'prompt_type': <PromptType.CUSTOM: 'custom'>}, template_vars=['section_name', 'section_text', 'regulations'], kwargs={}, output_parser=None, template_var_mappings=None, function_mappings=None, template='This {section_name} is a section of a privacy policy:\n\n{section_text}\n\nPlease judge whether this section is compliant with the {regulations} regulations. If not, please provide suggestions on how to improve it.\nNOTE THAT this is just one section, not the whole privacy policy. Only judge this section in isolation.\n'), llm=None, format_messages=False),
 'law_llm': LLMCompleteComponent(partial_dict={}, llm=HuggingFaceLLMModified(callback_manager=<llama_index.core.callbacks.base.CallbackManager object at 0x0000011D5391D420>, system_prompt='', messages_to_prompt=<function prepare_law_llm.<locals>.messages_to_prompt at 0x0000011BCAE2BAC0>, completion_to_prompt=<function default_completion_to_prom

In [67]:
output = q.run(
    section_name="Data Collection and Usage",
    section_text=generated_section,
    regulations="GDPR, CCPA",
)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[1;3;38;2;155;135;227m> Running module judge_section_template with input: 
section_name: Data Collection and Usage
section_text: The data we collect depends on the context of your interactions with Microsoft and the choices you make (including your privacy settings), the products and features you use, your location, and applica...
regulations: GDPR, CCPA

[0m[1;3;38;2;155;135;227m> Running module law_llm with input: 
prompt: This Data Collection and Usage is a section of a privacy policy:

The data we collect depends on the context of your interactions with Microsoft and the choices you make (including your privacy settin...

[0m[1;3;38;2;155;135;227m> Running module format_judge_template with input: 
query_str: This section appears to be compliant with the GDPR and CCPA regulations. It provides clear information about the data collected by Microsoft, the purposes for which the data is used, and the legal bas...

[0m[1;3;38;2;155;135;227m> Running module llm with input: 
message

In [68]:
output

{'pass': False,
 'suggestions': 'Clarify the legal basis for processing each type of data collected, such as consent, contract, or legitimate interest.'}

In [50]:
output

'4, 1911'

In [44]:
module_names = list(judge_pipeline.module_dict.keys())

In [45]:
judge_pipeline.add_link(module_names[0], module_names[-1], dest_key="prompt")

In [46]:
output = judge_pipeline.run(
    section_name="Data Collection and Usage",
    section_text=generated_section,
    regulations="GDPR, CCPA",
)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


[1;3;38;2;155;135;227m> Running module c950a9cc-1007-47c8-81be-975e7d5a584c with input: 
section_name: Data Collection and Usage
section_text: The data we collect depends on the context of your interactions with Microsoft and the choices you make (including your privacy settings), the products and features you use, your location, and applica...
regulations: GDPR, CCPA

[0m[1;3;38;2;155;135;227m> Running module 17259d05-5c9b-49e8-a997-b6ef220dd202 with input: 
prompt: This Data Collection and Usage is a section of a privacy policy:

The data we collect depends on the context of your interactions with Microsoft and the choices you make (including your privacy settin...

[0m[1;3;38;2;155;135;227m> Running module 26206de1-b0cc-4aac-b394-06e0d33ed296 with input: 
query_str: This section appears to be compliant with the GDPR and CCPA regulations. It provides clear information about the data collected by Microsoft, the purposes for which the data is used, and the legal bas...

[0m[1;3;

ValueError: Module input keys must have exactly one key if dest_key is not specified. Remaining keys: in module: {'input', 'prompt'}

In [None]:
output.judges

[Judge(name=<SectionNames.SECTION3: 'Data Collection and Usage'>, suggestions='Clarify the legal basis for processing each type of data collected, such as consent, contract, or legitimate interest.'),
 Judge(name=<SectionNames.SECTION6: 'Data Retention'>, suggestions='Include information about how long Microsoft retains the data and the criteria used to determine the retention period.'),
 Judge(name=<SectionNames.SECTION4: 'Data Subject Rights'>, suggestions='Provide information about how data subjects can exercise their rights to access, rectify, erase, restrict, and object to processing.'),
 Judge(name=<SectionNames.SECTION5: 'Data Sharing and Transfers'>, suggestions='Ensure that data is shared only on a need-to-know basis and provide details about the entities with whom data is shared.')]

## Regulation Selector (RAW)

In [7]:
REGION_TEMPLATE = """
Assistant: I can help determine which region a user input belongs to.
The regions are: {regions}. Make sure to include all options.

Examples:
User: los angeles
Assistant: California, United States
User: hong kong
Assistant: Other
User: berlin
Assistant: European Union
User: toronto
Assistant: Canada
Now, let's try with your input.
User: {user_input}
Assistant: The regions that the user input belongs to are:
"""

In [8]:
regions_parser = PydanticOutputParser(SelectedRegions)
region_selection_template = PromptTemplate(
    regions_parser.format(REGION_TEMPLATE))

In [11]:
region_selection_pipeline = QueryPipeline(chain=[
    region_selection_template,
    llm,
    regions_parser,
    FnComponent(fn=lambda input: [each.value for each in input.regions]),
],
                                          verbose=True)

In [12]:
region_selection_pipeline.run(regions=', '.join(
    [each.value for each in RegulationRegions]),
                              user_input="San Diego")

[1;3;38;2;155;135;227m> Running module 2bdcd37d-5565-47c1-86bd-7c8ee315d2a6 with input: 
regions: United States, California, European Union, United Kingdom, Canada, Australia, India, Singapore, Japan, South Korea, Brazil, South Africa, Other
user_input: San Diego

[0m[1;3;38;2;155;135;227m> Running module 618e45c0-25f9-4a56-bee2-0c38c980eccd with input: 
messages: 
Assistant: I can help determine which region a user input belongs to.
The regions are: United States, California, European Union, United Kingdom, Canada, Australia, India, Singapore, Japan, South Kor...

[0mLLM Prompt Token Usage: 294
LLM Completion Token Usage: 13
[1;3;38;2;155;135;227m> Running module 9099210b-ef12-414f-b986-f26900e010f4 with input: 
input: assistant: {
  "regions": ["California", "United States"]
}

[0m[1;3;38;2;155;135;227m> Running module e318e750-742f-48e0-b5b7-cae45c9f0451 with input: 
input: regions=[<RegulationRegions.USCA: 'California'>, <RegulationRegions.US: 'United States'>]

[0m

['California', 'United States']

In [8]:
[each.value for each in RegulationRegions]

['United States',
 'California',
 'European Union',
 'United Kingdom',
 'Canada',
 'Australia',
 'India',
 'Singapore',
 'Japan',
 'South Korea',
 'Brazil',
 'South Africa',
 'Other']