In [2]:
import spacy

nlp = spacy.blank("en")
nlp.add_pipe(
    "llm",
    config={
        "task": {
            "@llm_tasks": "spacy.NER.v2",
            "labels": ["PERSON", "ORGANISATION", "LOCATION", "PHONE NUMBER"]
        },
        "model": {
            "@llm_models": "spacy.GPT-3-5.v1",
        },
    },
)
nlp.initialize()
doc = nlp("Jack and Jill rode up the hill in Les Deux Alpes")
print([(ent.text, ent.label_) for ent in doc.ents])


[('Jack', 'PERSON'), ('Jill', 'PERSON'), ('Les Deux Alpes', 'LOCATION')]


In [9]:
# https://sourajit16-02-93.medium.com/zero-shot-named-entity-recognition-using-openai-chatgpt-api-46738191f375
import os 

SYSTEM_PROMPT = "You are a smart and intelligent Named Entity Recognition (NER) system. I will provide you the definition of the entities you need to extract, the sentence from where your extract the entities and the output format with examples."

USER_PROMPT_1 = "Are you clear about your role?"

ASSISTANT_PROMPT_1 = "Sure, I'm ready to help you with your NER task. Please provide me with the necessary information to get started."

GUIDELINES_PROMPT = (
    "Entity Definition:\n"
    "1. PERSON: Short name or full name of a person from any geographic regions.\n"
    "2. DATE: Any format of dates. Dates can also be in natural language.\n"
    "3. LOC: Name of any geographic location, like cities, countries, continents, districts etc.\n"
    "\n"
    "Output Format:\n"
    "{{'PERSON': [list of entities present], 'DATE': [list of entities present], 'LOC': [list of entities present]}}\n"
    "If no entities are presented in any categories keep it None\n"
    "\n"
    "Examples:\n"
    "\n"
    "1. Sentence: Mr. Jacob lives in Madrid since 12th January 2015.\n"
    "Output: {{'PERSON': ['Mr. Jacob'], 'DATE': ['12th January 2015'], 'LOC': ['Madrid']}}\n"
    "\n"
    "2. Sentence: Mr. Rajeev Mishra and Sunita Roy are friends and they meet each other on 24/03/1998.\n"
    "Output: {{'PERSON': ['Mr. Rajeev Mishra', 'Sunita Roy'], 'DATE': ['24/03/1998'], 'LOC': ['None']}}\n"
    "\n"
    "3. Sentence: {}\n"
    "Output: "
)

import openai

openai.api_key = os.getenv("OPENAI_API_KEY")

def openai_chat_completion_response(final_prompt):
  response = openai.ChatCompletion.create(
              model="gpt-3.5-turbo",
              messages=[
                    {"role": "system", "content": SYSTEM_PROMPT},
                    {"role": "user", "content": USER_PROMPT_1},
                    {"role": "assistant", "content": ASSISTANT_PROMPT_1},
                    {"role": "user", "content": final_prompt}
                ]
            )

  return response['choices'][0]['message']['content'].strip(" \n")

my_sentence = "Hanyu Pinyin worked under Christopher in the same project for 2 years and the project started on 24-06-2018."
GUIDELINES_PROMPT = GUIDELINES_PROMPT.format(my_sentence)
ners = openai_chat_completion_response(GUIDELINES_PROMPT)
print(ners)

Entity Definition:
1. PERSON: Short name or full name of a person from any geographic regions.
2. DATE: Any format of dates. Dates can also be in natural language.
3. LOC: Name of any geographic location, like cities, countries, continents, districts etc.

Output Format:
{'PERSON': [list of entities present], 'DATE': [list of entities present], 'LOC': [list of entities present]}
If no entities are presented in any categories keep it None

Examples:

1. Sentence: Mr. Jacob lives in Madrid since 12th January 2015.
Output: {'PERSON': ['Mr. Jacob'], 'DATE': ['12th January 2015'], 'LOC': ['Madrid']}

2. Sentence: Mr. Rajeev Mishra and Sunita Roy are friends and they meet each other on 24/03/1998.
Output: {'PERSON': ['Mr. Rajeev Mishra', 'Sunita Roy'], 'DATE': ['24/03/1998'], 'LOC': ['None']}

3. Sentence: Hanyu Pinyin worked under Christopher in the same project for 2 years and the project started on 24-06-2018.
Output: 
{
  "id": "chatcmpl-7r7L7uDQdUpoMpbuTkFe5KgoDtoYh",
  "object": "chat.

In [35]:
import openai
openai.api_key = os.getenv("OPENAI_API_KEY")

SYSTEM_PROMPT = "You are a smart and intelligent regEx generator system. I will provide you examples of text."

examplesInput = []

fileOpen = open("phone_numbers_examples.txt", "r")
data = fileOpen.read().split("\n")
data = data[2:]
data = ",".join(list(filter(lambda x: x.strip(), data)))
# print(data)

prompt = "provide a regex expression for each of the following phrases separated by a comma. give me the regex with no other information and separate each regex expression with a comma. do not create a regex expression that is a copy of the original text. encapsulate each of the expressions in bracketsthe phrases are:"+ data
# write me a regex expression  separated by a comma without any other information for each text separated by a comma for the following:" + data

def openai_chat_completion_response(prompt):
    response = openai.ChatCompletion.create(
              model="gpt-3.5-turbo",
              messages=[{"role": "user", "content": prompt}])
    return response['choices'][0]['message']['content'].strip()

print(openai_chat_completion_response(prompt))



[0-9]{3}-[0-9]{3}-[0-9]{4},\([0-9]{3}\) [0-9]{3}-[0-9]{4},[0-9]{4} [0-9]{5},[0-9]{3} [0-9]{3} [0-9]{4},[0-9]{10},[0-9]{3}-[0-9]{4}
