<a href="https://colab.research.google.com/github/LLMsLab/chat-gpt-api-lab/blob/exploration%2Fchatgpt-api-understanding/tutorial_03_chatgpt_api_with_presidio.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Using Presidio and ChatGPT API to anonymize PII data

## Setup



In [None]:
# Requirements
!pip install -qU python-dotenv openai gradio presidio-analyzer presidio-anonymizer
!python -m spacy download en_core_web_lg -qU

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.0/72.0 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.9/19.9 MB[0m [31m89.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.0/76.0 kB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m55.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.0/57.0 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m288.1/288.1 kB[0m [31m28.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.4/75.4 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.5/224.5 kB[0m [31m24.8 MB/s[0m 

In [None]:
# Importing necessary modules
import os
from dotenv import load_dotenv
import openai
from presidio_analyzer import AnalyzerEngine
from presidio_anonymizer import AnonymizerEngine
from IPython.display import HTML, display

To wrap the lines in the notebook's output use the following function.

Reference: [Line Wrapping in Collaboratory Google results](https://stackoverflow.com/questions/58890109/line-wrapping-in-collaboratory-google-results)

In [None]:
def set_css():
    """
    Wraps the lines in the notebook's output.
    """
    display(HTML('''
    <style>
      pre {
        white-space: pre-wrap;
      }
    </style>
    '''))

get_ipython().events.register('pre_run_cell', set_css)

In [None]:
# Mount Google Drive and ensure Google CoLab is running the correct
# version of TensorFlow
try:
  from google.colab import drive
  drive.mount('/content/drive', force_remount=True)
  COLAB = True
  print("Note: using Google Colab")
  %tensorflow_version 2.x
except:
  print("Note: not using Google Colab")
  COLAB = False

Mounted at /content/drive
Note: using Google Colab
Colab only includes TensorFlow 2.x; %tensorflow_version has no effect.


In [None]:
# Load the environment variables from the .env file
load_dotenv('/content/drive/MyDrive/Projects/.env')

True

In [None]:
# Retrieving API keys from the environment
openai.api_key = os.getenv("OPENAI_API_KEY")

## Presidio and ChatGPT API working together 

In [None]:
def analyze_and_anonymize(text: str):
    """
    Analyze and anonymize PII data from a given text string.

    Parameters
    ----------
    text : str
        The text to be analyzed and anonymized.

    Returns
    -------
    str
        The anonymized text.

    Example
    -------
    >>> text = "Mr. Smith's phone number is 212-555-5555, his SSN is \
    >>> 432-56-5654, and his credit card number is 344078656339539"
    >>> print(analyze_and_anonymize(text))
    """
    # Set up the engine, loads the NLP module (spaCy model by default)
    # and other PII recognizers
    analyzer = AnalyzerEngine()

    # Define the entities to analyze
    entities = [
        "CREDIT_CARD",
        "CRYPTO",
        "DATE_TIME",
        "EMAIL_ADDRESS",
        "IBAN_CODE",
        "IP_ADDRESS",
        "NRP",
        "LOCATION",
        "PERSON",
        "PHONE_NUMBER",
        "MEDICAL_LICENSE",
        "URL",
        "US_BANK_NUMBER",
        "US_DRIVER_LICENSE",
        "US_ITIN",
        "US_PASSPORT",
        "US_SSN",
    ]

    # Call analyzer to get results
    results = analyzer.analyze(text=text, entities=entities, language="en")

    # Analyzer results are passed to the AnonymizerEngine for anonymization
    anonymizer = AnonymizerEngine()

    # Anonymize the text
    anonymized_text = anonymizer.anonymize(text=text, analyzer_results=results)

    return anonymized_text.text

In [None]:
message_history = []

def chat(inp, role="user"):
    message_history.append({"role": role, "content": f"{inp}"})
    completion = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=message_history
    )
    reply_content = completion.choices[0].message.content
    message_history.append({"role": "assistant", "content": f"{reply_content}"})
    return reply_content

In [None]:
for i in range(2):
    # What is the moon's circumference in km?
    # which moon is that in reference to?
    # Mr. Smith's phone number is 212-555-5555, his SSN is 432-56-5654, and his 
    # credit card number is 344078656339539. Write an email to Mr. Smith about
    # new Life insurance product "star"
    user_input = input("> ")
    print("User's input was: ", user_input)
    user_input_anonymized = analyze_and_anonymize(user_input)
    print("Anonymized user's input: ", user_input_anonymized)
    print(chat(user_input_anonymized))
    print()

> Mr. Smith's phone number is 212-555-5555, his SSN is 432-56-5654, and his credit card number is 344078656339539. Write an email to Mr. Smith about new Life insurance product "star"




User's input was:  Mr. Smith's phone number is 212-555-5555, his SSN is 432-56-5654, and his credit card number is 344078656339539. Write an email to Mr. Smith about new Life insurance product "star"
Anonymized user's input:  Mr. <PERSON>'s phone number is <PHONE_NUMBER>, his SSN is <US_SSN>, and his credit card number is <CREDIT_CARD>. Write an email to Mr. <PERSON> about new Life insurance product "star"
Dear Mr. <PERSON>,

I hope this email finds you in good health and high spirits.

At ABC Insurance, we are proud to introduce our new life insurance product "Star". This innovative offering is designed to provide comprehensive and affordable protection for you and your loved ones.

We understand that securing your family's wellbeing is of utmost importance to you. With "Star", you can enjoy peace of mind knowing that your loved ones will be taken care of in the unlikely event of any unforeseen circumstances.

As a valued client of ABC Insurance, we believe that "Star" would be an exc



User's input was:  Include the date on the email
Anonymized user's input:  Include the date on the email
Sure, here's the updated email with the date included:


Dear Mr. <PERSON>,

I hope this email finds you in good health and high spirits.

As of [TODAY'S DATE], we are proud to introduce our new life insurance product "Star" at ABC Insurance. This innovative offering is designed to provide comprehensive and affordable protection for you and your loved ones.

We understand that securing your family's wellbeing is of utmost importance to you. With "Star", you can enjoy peace of mind knowing that your loved ones will be taken care of in the unlikely event of any unforeseen circumstances.

As a valued client of ABC Insurance, we believe that "Star" would be an excellent addition to your existing coverage. The benefits of the policy include:

- Coverage for natural death and accidental death
- A lump sum payout to beneficiaries in the event of death
- Fixed premiums that remain the same 