In [1]:
import os
import uuid
from typing import Sequence, List, Any, Dict, Tuple

import torch
import regex as re
import tiktoken
import json
from llama_index.agent.openai import OpenAIAgent, OpenAIAgentWorker
from llama_index.llms.openai import OpenAI
from llama_index.core.agent import FunctionCallingAgentWorker, CustomSimpleAgentWorker, ReActAgentWorker
from llama_index.agent.introspective import SelfReflectionAgentWorker, IntrospectiveAgentWorker
from llama_index.agent.openai.step import OpenAIAgentWorker
from llama_index.core import Settings, StorageContext
from llama_index.core.indices import load_index_from_storage
from llama_index.core.llms import ChatMessage, MessageRole
from llama_index.core.tools import BaseTool, FunctionTool, ToolOutput, QueryEngineTool, ToolMetadata
from llama_index.core.memory import (
    VectorMemory,
    SimpleComposableMemory,
    ChatSummaryMemoryBuffer,
    ChatMemoryBuffer,
)
from llama_index.core.bridge.pydantic import Field
from llama_index.core.agent.types import Task, TaskStep
from llama_index.core.chat_engine.types import AgentChatResponse
from llama_index.core.indices.vector_store import VectorStoreIndex
from llama_index.core.vector_stores.types import VectorStore
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core.callbacks import CallbackManager, TokenCountingHandler
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.core.agent.utils import add_user_step_to_memory
from duckduckgo_search import DDGS
from IPython.display import Markdown, display
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig


  _torch_pytree._register_pytree_node(


In [2]:
model = 'gpt-4o'
llm = OpenAI(model=model)
tokenizer_fn = tiktoken.get_encoding("cl100k_base").encode
token_counter = TokenCountingHandler(tokenizer=tokenizer_fn)

Settings.llm = llm
# Settings.callback_manager = CallbackManager([token_counter])

In [3]:
model_name = "Equall/Saul-7B-Instruct-v1"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map='auto',
    trust_remote_code=True,
    use_safetensors=True,
    quantization_config=BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype="bfloat16",  # bfloat16 is recommended
        bnb_4bit_use_double_quant=False,
        bnb_4bit_quant_type='nf4',
        llm_int8_skip_modules=['lm_head'],
    ),
    # dtype=torch.bfloat16,
)

  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(


Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]



In [4]:
def messages_to_prompt(messages: Sequence[ChatMessage]):

    messages = [{
        'role': msg.role.value,
        'content': msg.content.strip()
    } for msg in messages]

    prompt = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True,
    )

    return prompt

In [14]:
law_llm = HuggingFaceLLM(
    model_name=model_name,
    model=model,
    tokenizer=tokenizer,
    messages_to_prompt=messages_to_prompt,
)

In [6]:
class HuggingFaceAgentWorker(CustomSimpleAgentWorker):

    prefix_message: List[ChatMessage] = Field(default_factory=list,
                                              description="Prefix messages")

    def get_all_messages(self, task: Task) -> List[ChatMessage]:
        return (self.prefix_message + task.memory.get(input=task.input) +
                task.extra_state["new_memory"].get_all())

    def _initialize_state(self, task: Task, **kwargs: Any) -> Dict[str, Any]:
        pass

    def initialize_step(self, task: Task, **kwargs: Any) -> TaskStep:
        """Initialize step from task."""
        sources: List[ToolOutput] = []
        # temporary memory for new messages
        new_memory = ChatMemoryBuffer.from_defaults()
        # initialize task state
        task_state = {
            "sources": sources,
            "n_function_calls": 0,
            "new_memory": new_memory,
        }
        task.extra_state.update(task_state)

        return TaskStep(
            task_id=task.task_id,
            step_id=str(uuid.uuid4()),
            input=task.input,
        )

    def _run_step(self,
                  state: Dict[str, Any],
                  task: Task,
                  input: str | None = None) -> Tuple[AgentChatResponse, bool]:

        if input is not None:
            user_message = ChatMessage(content=input, role=MessageRole.USER)
            task.extra_state["new_memory"].put(user_message)
            if self.verbose:
                print(f"Added user message to memory: {input}")

        response = self.llm.chat(self.get_all_messages(task))
        # TODO: whether enable function call

        if self.verbose and response.message.content:
            print("=== LLM Response ===")
            print(str(response.message.content))

        task.extra_state["new_memory"].put(response.message)

        agent_response = AgentChatResponse(
            response=str(response.message.content),
            sources=task.extra_state["sources"],
        )

        return agent_response, True

    def _finalize_task(self, state: Dict[str, Any], **kwargs: Any) -> None:
        pass

    def finalize_task(self, task: Task, **kwargs: Any) -> None:
        # add new messages to memory
        task.memory.set(task.memory.get_all() +
                        task.extra_state["new_memory"].get_all())
        # reset new memory
        task.extra_state["new_memory"].reset()

In [15]:
vector_memory = VectorMemory.from_defaults(
    vector_store=None,
    embed_model=OpenAIEmbedding(),
    retriever_kwargs={"similarity_top_k": 3},
)
context_memory = ChatSummaryMemoryBuffer.from_defaults(
    chat_history=None,
    llm=llm,
    token_limit=4000,
    tokenizer_fn=tokenizer_fn,
)
composable_memory = SimpleComposableMemory.from_defaults(
    primary_memory=context_memory,
    secondary_memory_sources=[vector_memory],
)

In [29]:
law_agent_worker = HuggingFaceAgentWorker(
    tools=[],
    llm=law_llm,
    callback_manager=CallbackManager([TokenCountingHandler(tokenizer)]),
    verbose=True,
)
law_agent = law_agent_worker.as_agent(memory=composable_memory)

In [30]:
law_agent.reset()

In [31]:
output = law_agent.chat(
    """Judge this Introduction of the privacy policy for GDPR compliance. 
    Note that we know this part is incomplete for a real privacy policy, so only provide suggestions for optimizing this part.
    DO NOT provide suggestions for the entire privacy policy.

Privacy Policy Introduction

Purpose of the Privacy Policy

Welcome to [Your Organization]'s Privacy Policy. This document outlines how we collect, use, and protect your personal data when you interact with our services. Our goal is to provide you with clear and transparent information about our data practices, ensuring you have the knowledge and control over your personal information.

Our Commitment to Data Protection

At [Your Organization], we are committed to safeguarding your privacy and ensuring the security of your personal data. We adhere to the principles of the European General Data Protection Regulation (GDPR), which mandates strict guidelines for data protection and privacy. Our dedication to these principles reflects our commitment to maintaining your trust and confidence in our services. We strive to balance simplicity and comprehensiveness in our privacy practices, empowering you with the understanding and control necessary to manage your personal information effectively."""
)
display(Markdown(f"<b>{output.response}</b>"))

Added user message to memory: Judge this Introduction of the privacy policy for GDPR compliance. 
    Note that we know this part is incomplete for a real privacy policy, so only provide suggestions for optimizing this part.
    DO NOT provide suggestions for the entire privacy policy.

Privacy Policy Introduction

Purpose of the Privacy Policy

Welcome to [Your Organization]'s Privacy Policy. This document outlines how we collect, use, and protect your personal data when you interact with our services. Our goal is to provide you with clear and transparent information about our data practices, ensuring you have the knowledge and control over your personal information.

Our Commitment to Data Protection

At [Your Organization], we are committed to safeguarding your privacy and ensuring the security of your personal data. We adhere to the principles of the European General Data Protection Regulation (GDPR), which mandates strict guidelines for data protection and privacy. Our dedication 

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


=== LLM Response ===
The introduction to the privacy policy is a good start, but it can be improved to provide more clarity and specificity. Here are some suggestions:

1. Clarify the scope of the policy: The introduction should clearly state the types of data covered by the policy, such as personal data, sensitive data, and non-personal data. This will help users understand the extent of the policy's coverage.

2. Mention the purpose of the policy: The introduction should explicitly state the purpose of the policy, which is to inform users about how their data is collected, used, and protected. This will help users understand the importance of the policy and its relevance to their interactions with the organization.

3. Highlight the organization's commitment to data protection: The introduction should emphasize the organization's commitment to data protection and privacy, and how it adheres to the principles of the GDPR. This will help users understand the organization's dedication t

<b>The introduction to the privacy policy is a good start, but it can be improved to provide more clarity and specificity. Here are some suggestions:

1. Clarify the scope of the policy: The introduction should clearly state the types of data covered by the policy, such as personal data, sensitive data, and non-personal data. This will help users understand the extent of the policy's coverage.

2. Mention the purpose of the policy: The introduction should explicitly state the purpose of the policy, which is to inform users about how their data is collected, used, and protected. This will help users understand the importance of the policy and its relevance to their interactions with the organization.

3. Highlight the organization's commitment to data protection: The introduction should emphasize the organization's commitment to data protection and privacy, and how it adheres to the principles of the GDPR. This will help users understand the organization's dedication to protecting their data and how it aligns with the legal requirements.

4. Provide a brief overview of the policy: The introduction should provide a brief overview of the policy, including the key topics covered, such as data collection, data usage, data sharing, data</b>

In [36]:
law_agent.callback_manager.handlers[0].total_llm_token_count

0

In [8]:
chat_history = [
    ChatMessage(
        content=
        "You are an assistant that judges whether the part of a privacy policy is compliant with the GDPR.",
        role=MessageRole.USER,  # SYSTEM
    )
]

In [26]:
# self_reflection_agent_worker = SelfReflectionAgentWorker.from_defaults(
#     llm=law_llm,
#     verbose=True,
# )
main_agent_worker = OpenAIAgentWorker.from_tools(
    tools=[],
    llm=llm,
    verbose=True,
)
introspective_worker_agent = IntrospectiveAgentWorker.from_defaults(
    reflective_agent_worker=law_agent_worker,
    main_agent_worker=main_agent_worker,
    verbose=True,
)
introspective_agent = introspective_worker_agent.as_agent(
    # chat_history=chat_history,
    verbose=True, )

In [27]:
output = introspective_agent.chat(
    """Judge this Introduction of the privacy policy for GDPR compliance. 
    Note that we know this part is incomplete for a real privacy policy, so only provide suggestions for optimizing this part.
    DO NOT provide suggestions for the entire privacy policy.
    Only provide suggestions, do not try to revise it.

Privacy Policy Introduction

Purpose of the Privacy Policy

Welcome to [Your Organization]'s Privacy Policy. This document outlines how we collect, use, and protect your personal data when you interact with our services. Our goal is to provide you with clear and transparent information about our data practices, ensuring you have the knowledge and control over your personal information.

Our Commitment to Data Protection

At [Your Organization], we are committed to safeguarding your privacy and ensuring the security of your personal data. We adhere to the principles of the European General Data Protection Regulation (GDPR), which mandates strict guidelines for data protection and privacy. Our dedication to these principles reflects our commitment to maintaining your trust and confidence in our services. We strive to balance simplicity and comprehensiveness in our privacy practices, empowering you with the understanding and control necessary to manage your personal information effectively."""
)
display(Markdown(f"<b>{output.response}</b>"))

> Running step 41304bf5-e7bf-4df9-a87d-482fcf800bfe. Step input: Judge this Introduction of the privacy policy for GDPR compliance. 
    Note that we know this part is incomplete for a real privacy policy, so only provide suggestions for optimizing this part.
    DO NOT provide suggestions for the entire privacy policy.

Privacy Policy Introduction

Purpose of the Privacy Policy

Welcome to [Your Organization]'s Privacy Policy. This document outlines how we collect, use, and protect your personal data when you interact with our services. Our goal is to provide you with clear and transparent information about our data practices, ensuring you have the knowledge and control over your personal information.

Our Commitment to Data Protection

At [Your Organization], we are committed to safeguarding your privacy and ensuring the security of your personal data. We adhere to the principles of the European General Data Protection Regulation (GDPR), which mandates strict guidelines for data prot

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Added user message to memory: The introduction of your privacy policy is a good start, but there are a few areas where it can be optimized for GDPR compliance and clarity:

1. **Specify the Data Controller**: Clearly identify who the data controller is. This is a requirement under GDPR.

2. **Contact Information**: Provide contact details for data protection inquiries, including the Data Protection Officer (DPO) if applicable.

3. **Legal Basis for Processing**: Mention the legal basis for processing personal data as required by GDPR.

4. **User Rights**: Briefly mention the rights of the data subjects under GDPR.

5. **Scope of the Policy**: Clarify the scope of the policy, including who it applies to and what services it covers.

Here is a revised version incorporating these suggestions:

---

**Privacy Policy Introduction**

**Purpose of the Privacy Policy**

Welcome to [Your Organization]'s Privacy Policy. This document outlines how we collect, use, and protect your personal data w

<b>The introduction of your privacy policy is a good start, but there are a few areas where it can be optimized for GDPR compliance and clarity:

1. **Specify the Data Controller**: Clearly identify who the data controller is. This is a requirement under GDPR.

2. **Contact Information**: Provide contact details for data protection inquiries, including the Data Protection Officer (DPO) if applicable.

3. **Legal Basis for Processing**: Mention the legal basis for processing personal data as required by GDPR.

4. **User Rights**: Briefly mention the rights of the data subjects under GDPR.

5. **Scope of the Policy**: Clarify the scope of the policy, including who it applies to and what services it covers.

Here is a revised version incorporating these suggestions:

---

**Privacy Policy Introduction**

**Purpose of the Privacy Policy**

Welcome to [Your Organization]'s Privacy Policy. This document outlines how we collect, use, and protect your personal data when you interact with our services. Our goal is to provide you with clear and transparent information about our data practices, ensuring you</b>

In [2]:
pipe = pipeline("text-generation",
                model="Equall/Saul-7B-Instruct-v1",
                torch_dtype=torch.bfloat16,
                device_map="auto")

Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]

In [3]:
messages = [
    {
        "role":
        "user",
        "content":
        """Judge the following section from a privacy policy, to see whether it's GDPR compliant:
        
        **Privacy Policy Introduction**

        **Purpose of the Privacy Policy**

        Welcome to [Your Organization]'s Privacy Policy. This document outlines how we collect, use, and protect your personal data when you interact with our services. Our goal is to provide you with clear and transparent information about our data practices, ensuring you have the knowledge and control over your personal information.

        **Our Commitment to Data Protection**

        At [Your Organization], we are committed to safeguarding your privacy and ensuring the security of your personal data. We adhere to the principles of the European General Data Protection Regulation (GDPR), which mandates strict guidelines for data protection and privacy. Our dedication to these principles reflects our commitment to maintaining your trust and confidence in our services. We strive to balance simplicity and comprehensiveness in our privacy practices, empowering you with the understanding and control necessary to manage your personal information effectively."""
    },
]
prompt = pipe.tokenizer.apply_chat_template(messages,
                                            tokenize=False,
                                            add_generation_prompt=True)

In [4]:
outputs = pipe(
    prompt,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    max_length=2048,
    truncation=True,
    #   eos_token_id=tokenizer.eos_token_id,
    return_full_text=False)
print(outputs[0]["generated_text"])

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


 The provided text is from the introduction and commitment to data protection in the GDPR compliance context, which is GDPR compliant. The text clearly states the purpose of the privacy policy, which is to inform users about the collection, use, and protection of their personal data. It also mentions adherence to the GDPR principles, which shows commitment to data protection and privacy. The text balances simplicity and comprehensiveness in its privacy practices, which is a GDPR recommendation. Overall, it seems that the text is GDPR compliant.
