In [None]:
from datetime import date
from typing import List

import dotenv
import marvin
import spacy
from langchain.chains import create_extraction_chain
from langchain.chat_models import ChatAnthropic
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.output_parsers import CommaSeparatedListOutputParser
from langchain.prompts import PromptTemplate
from langchain.schema import HumanMessage, SystemMessage
from langchain.vectorstores import Chroma
from langchain_experimental.llms.anthropic_functions import AnthropicFunctions
from pydantic import BaseModel, Field
from transformers import AutoModelForTokenClassification, AutoTokenizer, pipeline

from redbox.llm.llm_base import LLMHandler
from redbox.models.file import get_entity_metadata

ENV = dotenv.dotenv_values("../.env")

In [None]:
sys_prompt = SystemMessage(content="Help answer questions from user")
llm = ChatAnthropic(anthropic_api_key=ENV["ANTHROPIC_API_KEY"])

handler = LLMHandler(
    llm=llm,
    user_uuid="foo",
    vector_store=Chroma(
        embedding_function=SentenceTransformerEmbeddings(),
        persist_directory="../data/db/",
    ),
)

In [None]:
# llm = ChatAnthropic(anthropic_api_key=ENV["ANTHROPIC_API_KEY"])

raw_text_1 = """
 June 10, 2023 FROM - Dr. Aiden Maxwell, Chief Advisor on Artificial Intelligence
ACTION
Enhancement of Digital Ethics Framework
 SUMMARY
1. To ensure ethical and responsible usage of digital technologies, it is necessary to revise our current Digital Ethics Framework. The enhancements will address new challenges and complexities, including those introduced by recent advancements in Artificial Intelligence (AI). The revised framework aims to provide comprehensive guidelines on data privacy, algorithmic fairness, user consent, and transparency, ensuring our digital ethics are up to date with the evolving digital landscape.
RECOMMENDATION
2. We recommend the enhancement of the current Digital Ethics Framework. Does the Minister approve the revision process to accommodate advancements in digital technologies?
TIMING
3. A decision by July 1, 2023, would be optimal. Delay in updating the framework might lead to ethical concerns due to the increasing use and influence of AI and other digital technologies.
BACKGROUND
4. In an era of rapid digital transformation, our ethical guidelines must keep pace with the evolution of technology.
5. ADDRESSING NEW CHALLENGES: The current Digital Ethics Framework needs enhancement to address new challenges brought by the advancements in digital technologies, including AI.
6. DATA PRIVACY: With AI systems heavily reliant on data, the updated framework must provide clear guidelines on data privacy and protection.
7. ALGORITHMIC FAIRNESS: The framework will include guidelines for ensuring fairness and eliminating bias in automated decisions made by AI systems.
8. USER CONSENT AND TRANSPARENCY: The new framework will focus on obtaining informed user consent and promoting transparency in the use of digital technologies.
9. The enhancement of the Digital Ethics Framework is a significant step towards ensuring ethical and responsible use of evolving digital technologies.
CLEARANCE LIST

      Team
Clearance necessary?
Named lead who has cleared
SCS
Yes
Sir Winston Radcliffe
Perm Sec
No
Second Perm Sec
No
Communications (inc named Press Officer)
Yes
Lara Chambers
Finance/Fiscal Events
No
HR
No
Legal
No
Parliamentary Unit
No
Devolution
No
Analysis
No
HMTC
Yes
Isabella Sinclair
Other
Yes
Department for Digital, Culture, Media, and Sport, Office for Artificial Intelligence, Home Office
             
             
"""

In [None]:
raw_text_2 = """
 SENSITIVITY MARKING: OFFICIAL SENSITIVE
Ministerial AI Meeting
Tuesday 5 June 2023 1030-1130 Conference Room D
Attendees:
- Sarah Friar (DSIT Director of AI)
- Marie Clark (DSIT Head of AI Policy) - Hamza Khan (DSIT AI Researcher) - Alex Burghart (Cabinet Office)
Meeting Notes:
1. Sarah Friar outlined recent studies showing biases encoded in some recruitment AI tools, leading to discrimination in hiring.
2. Hamza Khan presented technical options for auditing algorithms and datasets to identify unfair biases. He stressed that biases can be difficult to detect without proactive audits.
3. Marie Clark suggested policy options such as mandatory bias auditing for vendors, restrictions on use of certain dataset types, and enhanced transparency rules.
4. Alex Burghart emphasized the need to balance innovation and regulation, proposing sandboxes for controlled testing before full deployment in recruitment.
5. All agreed developer training and diverse teams will be critical to avoiding biases when designing recruitment AI.
Actions:
- DSIT to consult further with academics on auditing methodologies.
- Cabinet Office to meet with recruitment AI vendors to discuss a code of ethics.

"""

In [None]:
raw_text_3 = """
 From: Tom Stewart <Tom.stewart@cabinetoffice.gov.uk>
Sent: Tuesday, 6 June 2023 11:02 AM
To: Joel Edmonds <Joel.edmonds@cabinetoffice.gov.uk>; Roseline Hawks <Roseline.hawks@cabinetoffice.gov.uk>; Alex Burghart <alex.burghart@cabinetoffice.gov.uk>
Subject: AI Ethics Framework
Hi all,
As we advance our AI strategy, I think it's crucial we develop a clear ethical framework to guide both public and private sector usage. This will be essential for maintaining public trust.
I suggest we put together a set of core principles around transparency, accountability, bias mitigation, privacy etc. We should also propose specific oversight mechanisms and compliance requirements.
Keen to get your thoughts ahead of discussing with ministers next week. What do you see as the priorities?
Best, Tom
From: Joel Edmonds <Joel.edmonds@cabinetoffice.gov.uk>
Sent: Tuesday, 6 June 2023 12:34 PM
To: Tom Stewart <Tom.stewart@cabinetoffice.gov.uk>
Cc: Roseline Hawks <Roseline.hawks@cabinetoffice.gov.uk>; Alex Burghart <alex.burghart@cabinetoffice.gov.uk>
Subject: Re: AI Ethics Framework Hi Tom,
Fully agree on the need for ethical guidelines. My view is we need to strike a balance between encouraging innovation and protecting rights. Areas I'd highlight:
- Transparency of AI systems, how they operate and impact decisions. - Mechanisms for human oversight and control.
- Rules to prevent encoded bias and discrimination.
- Strict data protection standards.
I think independent auditing will be needed to provide assurance. But open to other perspectives on oversight.
Joel

 From: Roseline Hawks <Roseline.hawks@cabinetoffice.gov.uk>
Sent: Tuesday, 6 June 2023 3:01 PM
To: Tom Stewart <Tom.stewart@cabinetoffice.gov.uk>
Cc: Joel Edmonds <Joel.edmonds@cabinetoffice.gov.uk>; Alex Burghart <alex.burghart@cabinetoffice.gov.uk>
Subject: Re: AI Ethics Framework
Hi both,
In addition to what Joel outlined, I would suggest:
- Requiring fairness assessments before AI deployment, especially in public sector. - Giving citizens clear rights to contest AI-informed decisions.
- Mandating evaluations of societal impact - not just technical performance.
This will help embed ethical thinking from the outset. Let me know your thoughts.
Best, Roseline

"""

# Marvin

* Does the job
* Slowly
* Requires small chunks for getting lists of stuff or thinks there's been "no function call"

In [None]:
# marvin.settings.llm_model = "anthropic/claude-2"
# marvin.settings.llm_model
# marvin.settings.llm_temperature	= 0
# marvin.settings.llm_temperature

In [None]:
get_entity_metadata(raw_text_1)

In [None]:
@marvin.ai_model(llm_temperature=0, llm_model="anthropic/claude-2")
class Action(BaseModel):
    date: date
    action: str = Field(description="The item to be actioned")


@marvin.ai_model()
class ChunkMetadata(BaseModel):
    names: List[str] = Field(
        description="A complete list of all people: the first name, last name and titles of important people. No job titles."
    )
    organisation_names: List[str] = Field(
        description="A complete list of all important organisations, institutions, and government departments"
    )
    actions: List[str] = Field(
        description="A complete list of all important actions with deadlines, in the format <date>, <action>"
    )


raw_1 = ChunkMetadata(raw_text_1)
raw_1.actions = [Action(act) for act in raw_1.actions]
raw_1

In [None]:
Action(
    "2023-06-05, Cabinet Office to meet with recruitment AI vendors to discuss a code of ethics."
)

In [None]:
DocumentMetadata(raw_text_2).model_dump()

In [None]:
out = []
for i in range(-(len(raw_text_2) // -1000)):
    out.append(DocumentMetadata(raw_text_3[i * 1000 : (i + 1) * 1000]))

In [None]:
print([i.model_dump() for i in out])

In [None]:
final = {}
for i, doc in enumerate(out):
    doc_dict = doc.model_dump()
    if i == 0:
        final = doc_dict
    else:
        for k, v in doc_dict.items():
            final[k] += v

for k, v in final.items():
    final[k] = list(set(v))

final

In [None]:
out[0].model_dump().update(out[1].model_dump())
out[0]
out

## Summary to Marvin

Combo it up to speed things up

In [None]:
@marvin.ai_model(llm_temperature=0, llm_model="anthropic/claude-2")
class Action(BaseModel):
    date: date
    action: str = Field(description="The item to be actioned")


@marvin.ai_model(llm_temperature=0, llm_model="anthropic/claude-2")
class Person(BaseModel):
    person_name: str = Field(description="The full name of the person. No job titles.")

In [None]:
llm = ChatAnthropic(anthropic_api_key=ENV["ANTHROPIC_API_KEY"])

_key_people_template = """
Here is some text, containted in <text> tags:

<text> {raw_text} </text>

Who are the important people in this document?

List of people:
"""

KEY_PEOPLE_PROMPT = PromptTemplate.from_template(_key_people_template)

to_send = HumanMessage(
    content=KEY_PEOPLE_PROMPT.format_prompt(raw_text=raw_text_1).to_string()
)

result = llm([to_send])

result.content

In [None]:
result.content

In [None]:
people = []
for i in result.content.replace("\n\n", "\n").split("\n"):
    if i[0] == "-":
        try:
            people.append(Person(i))
        except ValueError:
            pass

In [None]:
people

In [None]:
Person("- Lara Chambers, Communications clearance")

# Anthropic functions

* Unreliable and inaccurate
* Often errors

In [None]:
model = AnthropicFunctions(model="claude-2")

In [None]:
schema = {
    "properties": {
        "names": {
            "type": "list",
            "description": "A complete list of all people: the first name, last name and titles of important people. No job titles.",
        },
        "institutions": {
            "type": "list",
            "description": "A complete list of all important institutions, organsations and government departments",
        },
    }
}

chain = create_extraction_chain(schema, model)

chain.run(raw_text_1[:500])

# BERT

In [None]:
tokenizer = AutoTokenizer.from_pretrained("dslim/bert-large-NER")
model = AutoModelForTokenClassification.from_pretrained("dslim/bert-large-NER")

nlp = pipeline("ner", model=model, tokenizer=tokenizer)
example = "My name is Wolfgang and I live in Berlin"

ner_results = nlp(example)
print(ner_results)

In [None]:
ner_results = nlp(raw_text_1)

In [None]:
[e for e in ner_results if e["entity"][-3:] == "PER"]

In [None]:
raw_text_1[2108:2112]

# Spacy

In [None]:
nlp = spacy.load("en_core_web_lg")

doc1 = nlp(raw_text_1)

for ent in doc1.ents:
    if ent.label_ in ("PERSON"):
        print(ent.text, ent.start_char, ent.end_char, ent.label_)

print("---")

for ent in doc1.ents:
    if ent.label_ in ("ORG"):
        print(ent.text, ent.start_char, ent.end_char, ent.label_)

In [None]:
doc2 = nlp(raw_text_2)

for ent in doc2.ents:
    if ent.label_ in ("PERSON"):
        print(ent.text, ent.start_char, ent.end_char, ent.label_)

print("---")

for ent in doc2.ents:
    if ent.label_ in ("ORG"):
        print(ent.text, ent.start_char, ent.end_char, ent.label_)

In [None]:
doc3 = nlp(raw_text_3)

for ent in doc3.ents:
    if ent.label_ in ("PERSON"):
        print(ent.text, ent.start_char, ent.end_char, ent.label_)

print("---")

for ent in doc3.ents:
    if ent.label_ in ("ORG"):
        print(ent.text, ent.start_char, ent.end_char, ent.label_)

In [None]:
llm = ChatAnthropic(anthropic_api_key=ENV["ANTHROPIC_API_KEY"])

raw_text = "Put text here -- my birthday is on 25/1/89"

_key_date_template = """
Here is some text, containted in <text> tags:

<text> {raw_text} </text>

What are the important dates in this document?

List of dates:
"""

KEY_PEOPLE_PROMPT = PromptTemplate.from_template(_key_date_template)

to_send = HumanMessage(
    content=KEY_PEOPLE_PROMPT.format_prompt(raw_text=raw_text).to_string()
)

result = llm([to_send])

result.content

In [None]:
output_parser = CommaSeparatedListOutputParser()
format_instructions = output_parser.get_format_instructions()
format_instructions

In [None]:
raw_text = '"Lockdown point" -- when the "principle" leave the building\n\nWhat goes into your Red Box content decision making process? (emphasis on information density for a briefing, deadline oriented, etc.)\n\nRAG\n\nJudgement call based on time\n\nIs it IMPORTANT\nWas it important before\nWas the prime minister incolved, for example\nPEOPLE\nCOPY lists!! "Cast list" -- office vs private\n\n\nCOMMITMENTS from before\nMEDIA\n\nIs it URGENT\nTIME\nWHEN was it last talked about\n\nWhat do you want to show ministers in their red box?\n\nx\n\nWhat kind of tone and persona do you expect to best fit your Minister? (idiosyncrasies)'
# to_send = HumanMessage(content=raw_text)

In [None]:
# sys_prompt = SystemMessage(content="You are")
prompt = PromptTemplate(
    template="Here is some text, containted in <text> tags: \n\n <text> {raw_text} </text> \n\n Using only the information within the <text> tag generate a comma separated list of all the {entity} you find. If you cannot then return the word None.",
    input_variables=["entity", "raw_text"],
)
to_send = HumanMessage(
    content=prompt.format_prompt(entity="nouns", raw_text=raw_text).to_string()
)
_output = handler.llm([to_send])
output_parser.parse(_output.content)

In [None]:
nlp = spacy.load("en_core_web_lg")
doc = nlp(
    """
 June 10, 2023 FROM - Dr. Aiden Maxwell, Chief Advisor on Artificial Intelligence
ACTION
Enhancement of Digital Ethics Framework
 SUMMARY
1. To ensure ethical and responsible usage of digital technologies, it is necessary to revise our current Digital Ethics Framework. The enhancements will address new challenges and complexities, including those introduced by recent advancements in Artificial Intelligence (AI). The revised framework aims to provide comprehensive guidelines on data privacy, algorithmic fairness, user consent, and transparency, ensuring our digital ethics are up to date with the evolving digital landscape.
RECOMMENDATION
2. We recommend the enhancement of the current Digital Ethics Framework. Does the Minister approve the revision process to accommodate advancements in digital technologies?
TIMING
3. A decision by July 1, 2023, would be optimal. Delay in updating the framework might lead to ethical concerns due to the increasing use and influence of AI and other digital technologies.
BACKGROUND
4. In an era of rapid digital transformation, our ethical guidelines must keep pace with the evolution of technology.
5. ADDRESSING NEW CHALLENGES: The current Digital Ethics Framework needs enhancement to address new challenges brought by the advancements in digital technologies, including AI.
6. DATA PRIVACY: With AI systems heavily reliant on data, the updated framework must provide clear guidelines on data privacy and protection.
7. ALGORITHMIC FAIRNESS: The framework will include guidelines for ensuring fairness and eliminating bias in automated decisions made by AI systems.
8. USER CONSENT AND TRANSPARENCY: The new framework will focus on obtaining informed user consent and promoting transparency in the use of digital technologies.
9. The enhancement of the Digital Ethics Framework is a significant step towards ensuring ethical and responsible use of evolving digital technologies.
CLEARANCE LIST

      Team
Clearance necessary?
Named lead who has cleared
SCS
Yes
Sir Winston Radcliffe
Perm Sec
No
Second Perm Sec
No
Communications (inc named Press Officer)
Yes
Lara Chambers
Finance/Fiscal Events
No
HR
No
Legal
No
Parliamentary Unit
No
Devolution
No
Analysis
No
HMTC
Yes
Isabella Sinclair
Other
Yes
Department for Digital, Culture, Media, and Sport, Office for Artificial Intelligence, Home Office
             
             
"""
)

In [None]:
for ent in doc.ents:
    if ent.label_ in ("PERSON", "ORG", "DATE"):
        print(ent.text, ent.label_)

In [None]:
for ent in doc_sm.ents:
    if ent.label_ in ("PERSON", "ORG", "DATE"):
        print(ent.text, ent.label_)

In [None]:
nlp_sm = spacy.load("en_core_web_sm")
doc_sm = nlp(
    """
 July 27, 2023 FROM - Dr. Aiden Maxwell, Chief Advisor on Artificial Intelligence
ACTION
Consideration and Approval of the Proposed Artificial Intelligence Regulatory Framework
SUMMARY
1. The proposed Artificial Intelligence (AI) Regulatory Framework intends to provide clear guidelines for AI development and use within our nation. The framework places emphasis on safety, transparency, fairness, and accountability, while supporting innovation. The pressing need for regulation arises from the rapid development and ubiquitous application of AI, from healthcare to finance, with concomitant ethical and security concerns. A structured legal and ethical approach will instil confidence in AI technologies among the public, and also ensure that AI advancements do not inadvertently harm societal structure and individuals’ rights.
RECOMMENDATION
2. We propose the adoption of the AI Regulatory Framework. Is the Minister prepared to approve this regulatory framework?
TIMING
3. A decision by August 30, 2023, is advisable. Delay in establishing this regulation could increase the risk of misuse of AI, perpetuate existing ethical concerns, and erode public trust in AI technologies.
BACKGROUND
4. The remarkable speed at which AI technology has advanced has brought about transformative changes across various sectors, offering untold benefits. However, it also poses new and unique challenges relating to privacy, security, bias, and accountability.
5. NEED FOR REGULATION: AI technologies, left unchecked, can be intrusive, biased, and opaque, leading to societal and individual harm. The proposed framework aims to address these issues proactively.
6. SAFETY AND TRANSPARENCY: The framework includes guidelines on safety standards for AI and requirements for transparency, with AI systems needing to provide an understandable explanation for their decisions.
7. FAIRNESS AND ACCOUNTABILITY: The proposed regulation also stipulates measures to prevent biased decision-making and provides guidelines for accountability in cases where harm is caused by AI systems.
 
 8. SUPPORTING INNOVATION: While regulating AI, it is essential to ensure that we do not stifle innovation. The proposed regulation takes a balanced approach, offering clarity and predictability for developers, and promoting innovative AI technologies.
9. INTERNATIONAL COOPERATION: The regulation suggests cooperation with international partners to establish common standards and to handle cross-border AI issues.
10.Therefore, the proposed framework is of paramount importance in creating a trustworthy environment for AI use while ensuring that the technology continues to thrive and provide benefits.
CLEARANCE LIST
     Team
Clearance necessary?
Named lead who has cleared
SCS
Mandatory
Sir Winston Radcliffe
Perm Sec
No
Second Perm Sec
No
Communications (inc named Press Officer)
Yes
Lara Chambers
Finance/Fiscal Events
No
HR
No
Legal
Yes
Maxwell Hawthorne
Parliamentary Unit
No
Devolution
No
Analysis
No
HMTC
Yes
Isabella Sinclair
Other
No
             
             
"""
)

In [None]:
for ent in doc.ents:
    if ent.label_ in ("PERSON", "ORG", "DATE"):
        print(ent.text, ent.label_)

In [None]:
# sys_prompt = SystemMessage(content="You are")
prompt = PromptTemplate(
    template="Here is some text, containted in <text> tags: \n\n <text> {raw_text} </text> \n\n Using only the information within the <text> tag generate a comma separated list of all the {entity} you find. If you cannot then return the word None.",
    input_variables=["entity", "raw_text"],
)
to_send = HumanMessage(
    content=prompt.format_prompt(entity="people", raw_text=raw_text).to_string()
)
_output = handler.llm([to_send])
output_parser.parse(_output.content)

In [None]:
_output.content

In [None]:
_input = prompt.format(
    entity_prompt="Return a list of verbs in the following text: \n\n",
    raw_text='"Lockdown point" -- when the "principle" leave the building\n\nWhat goes into your Red Box content decision making process? (emphasis on information density for a briefing, deadline oriented, etc.)\n\nRAG\n\nJudgement call based on time\n\nIs it IMPORTANT\nWas it important before\nWas the prime minister incolved, for example\nPEOPLE\nCOPY lists!! "Cast list" -- office vs private\n\n\nCOMMITMENTS from before\nMEDIA\n\nIs it URGENT\nTIME\nWHEN was it last talked about\n\nWhat do you want to show ministers in their red box?\n\nx\n\nWhat kind of tone and persona do you expect to best fit your Minister? (idiosyncrasies)',
)

In [None]:
_input

In [None]:
result = llm()

In [None]:
handler.entity_extraction(
    user_entity="list of verbs in this text",
    raw_text='"Lockdown point" -- when the "principle" leave the building\n\nWhat goes into your Red Box content decision making process? (emphasis on information density for a briefing, deadline oriented, etc.)\n\nRAG\n\nJudgement call based on time\n\nIs it IMPORTANT\nWas it important before\nWas the prime minister incolved, for example\nPEOPLE\nCOPY lists!! "Cast list" -- office vs private\n\n\nCOMMITMENTS from before\nMEDIA\n\nIs it URGENT\nTIME\nWHEN was it last talked about\n\nWhat do you want to show ministers in their red box?\n\nx\n\nWhat kind of tone and persona do you expect to best fit your Minister? (idiosyncrasies)',
)

In [None]:
resp = handler.chat_with_rag(user_question="What is the cabinet office?")

In [None]:
for message in resp["chat_history"]:
    print(message.type, " : ", message.content)