In [1]:
import instructor
from openai import OpenAI
from typing import Iterable, Literal, List, Optional, Tuple, Union
from pydantic import BaseModel, Field
import pandas as pd

In [31]:
"""
Apollo.io API People Search API: 

person_titles (optional)	An array of the person's title. Apollo will return results matching ANY of the titles passed in	["sales director", "engineer manager"]
q_keywords (optional)	A string of words over which we want to filter the results	"Tim"
prospected_by_current_team (optional)	An array of string booleans defining whether we want models prospected by current team or not. "no" means to look in net new database only, "yes" means to see saved contacts only	["no"]
person_locations (optional)	An array of strings denoting allowed locations of the person	["California, US", "Minnesota, US"]
person_seniorities (optional)	An array of strings denoting the seniorities or levels	["senior", "manager"]
contact_email_status (optional)	An array of strings to look for people having a set of email statuses	["verified", "guessed", "unavailable", "bounced", "pending_manual_fulfillment"]
q_organization_domains (optional)	An array of the company domains to search for, joined by the new line character.	"google.com\nfacebook.com"
organization_locations (optional)	An array of strings denoting allowed locations of organization headquarters of the person	["California, US", "Minnesota, US"]
organization_ids (optional)	An array of organization ids obtained from companies-search	["63ff0bc1ff57ba0001e7eXXX"]
organization_num_employees_ranges (optional)	An array of intervals to include people belonging in an organization having number of employees in a range	["1,10", "101,200" ]
page (optional)	An integer that allows you to paginate through the results	1
per_page (optional)	An integer to load per_page results on a page. Should be in inclusive range [1, 100]	10
"""

# Equivalent Pydantic model for the Apollo.io API People Search API
class ApolloPeopleSearch(BaseModel):
    person_titles: Optional[List[str]] = Field(..., description="An array of the person's title. Apollo will return results matching ANY of the titles passed in e.g. ['sales director', 'engineer manager']")
    q_keywords: Optional[str] = Field(..., description="A string of words over which we want to filter the results e.g. 'Tim'")
    prospected_by_current_team: Optional[List[str]] = Field(..., description="An array of string booleans defining whether we want models prospected by current team or not. 'no' means to look in net new database only, 'yes' means to see saved contacts only e.g. ['no']")
    person_locations: Optional[List[str]] = Field(..., description="An array of strings denoting allowed locations of the person e.g. ['California, US', 'Minnesota, US']")
    person_seniorities: Optional[List[str]] = Field(..., description="An array of strings denoting the seniorities or levels e.g. ['senior', 'manager']")
    contact_email_status: Optional[Literal["verified", "guessed", "unavailable", "bounced", "pending_manual_fulfillment"]] = Field(None, description="An array of strings to look for people having a set of email statuses")
    q_organization_domains: Optional[str] = Field(..., description="An array of the company domains to search for, joined by the new line character e.g. 'google.com\nfacebook.com'")

"""
Apollo.io API Companies Search API: 
Parameter	Description	Example
organization_ids (optional)	An array of organization ids obtained from companies-search	["63ff0bc1ff57ba0001e7eXXX"]
organization_num_employees_ranges (optional)	An array of intervals to include organizations having number of employees in a range	["1,10", "101,200" ]
organization_locations (optional)	An array of strings denoting allowed locations of organization headquarters	["California, US", "Minnesota, US"]
organization_not_locations (optional)	An array of strings denoting un-allowed locations of organization headquarters	["Chicago, US"]
q_organization_keyword_tags (optional)	An array of strings denoting the keywords an organization should be associated with	["sales strategy", "lead"]
prospected_by_current_team (optional)	An array of string booleans defining whether we want models prospected by current team or not. "no" means to look in net new database only, "yes" means to see saved organizations only	["no"]
q_organization_name (optional)	A string representing the name of the organization we want to filter	"Apollo"
page (optional)	An integer that allows you to paginate through the results	1
per_page (optional)	An integer to load per_page results on a page. Should be in inclusive range [1, 100]	10
"""

# Equivalent Pydantic model for the Apollo.io API Companies Search API
class ApolloCompaniesSearch(BaseModel):
    organization_ids: Optional[List[str]] = Field(..., description="An array of organization ids obtained from companies-search")
    organization_num_employees_ranges: Optional[str] = Field(..., description="An array of intervals to include organizations having number of employees in a range e.g. '1,10' or '101,200'")
    organization_locations: Optional[List[str]] = Field(..., description="An array of strings denoting allowed locations of organization headquarters")
    organization_not_locations: Optional[List[str]] = Field(..., description="An array of strings denoting un-allowed locations of organization headquarters")
    q_organization_keyword_tags: Optional[List[str]] = Field(..., description="An array of strings denoting the keywords an organization should be associated with")
    prospected_by_current_team: Optional[List[str]] = Field(..., description="An array of string booleans defining whether we want models prospected by current team or not. 'no' means to look in net new database only, 'yes' means to see saved organizations only")
    q_organization_name: Optional[str] = Field(..., description="A string representing the name of the organization we want to filter")
    page: Optional[int] = Field(..., description="An integer that allows you to paginate through the results")
    per_page: Optional[int] = Field(..., description="An integer to load per_page results on a page. Should be in inclusive range [1, 100]")

In [32]:
client = instructor.from_openai(OpenAI())

class QueryAnnotation(BaseModel):
    id: str = Field(..., description="Unique Query ID") 
    expanded_query_variants: List[str] = Field(..., description="Rephrased queries with specific intent e.g. South India -> Bangalore, Bengaluru, Chennai, Hyderabad, Kochi")
    segments: List[str] = Field(..., description="Segments of the query to perform search on e.g. 'South India'")
    search_type: Literal["people", "companies"] = Field(..., description="Type of search to perform")

In [33]:
def get_query_annotation(data: str) -> Iterable[QueryAnnotation]:
    return client.chat.completions.create(
        model="gpt-4o-mini",
        response_model=Iterable[QueryAnnotation],
        messages=[
            {
                "role": "user",
                "content": f"Consider the user query below: '\n{data}' and annotate the query",
            },
        ],
        max_tokens=1000,
        temperature=0.0,
    seed=42,
)

In [34]:
def query_annotation(data: str) -> Iterable[Union[ApolloPeopleSearch, ApolloCompaniesSearch]]:
    query_annotation = get_query_annotation(data)
    for annotation in query_annotation:
        if annotation.search_type == "people":
            apollo_query = client.chat.completions.create(
                model="gpt-4o-mini",
                response_model=ApolloPeopleSearch,
                messages=[
                    {
                        "role": "user",
                        "content": f"Consider the user query below: '\n{data}' and annotate the query",
                    },
                ],
            )
        elif annotation.search_type == "companies":
            apollo_query = client.chat.completions.create(
                model="gpt-4o-mini",
                response_model=ApolloCompaniesSearch,
                messages=[
                    {
                        "role": "user",
                        "content": f"Consider the user query below: '\n{data}' and annotate the query",
                    },
                ],
            )
            print(f"Annotating companies search query: {annotation.id}")
        else:
            raise ValueError(f"Invalid search type: {annotation.search_type}")
        yield apollo_query

# for annotated_query in query_annotation("I am looking for a sales director in California with 10+ years of experience in B2B SaaS"):
#     print(annotated_query.model_dump_json(indent=2))

queries = ["I am looking for a sales director in California with 10+ years of experience in B2B SaaS", "Which B2B SaaS companies have more than 100 employees in California?"]
for query in queries:
    for annotated_query in query_annotation(query):
        print(f"Query: {query}")
        print(f"Annotated Query: {annotated_query.model_dump_json(indent=2)}")
        print("\n")

Annotating people search query: 1
Query: I am looking for a sales director in California with 10+ years of experience in B2B SaaS
Annotated Query: {
  "person_titles": [
    "sales director"
  ],
  "q_keywords": "B2B SaaS",
  "prospected_by_current_team": [
    "no"
  ],
  "person_locations": [
    "California, US"
  ],
  "person_seniorities": [
    "senior"
  ],
  "contact_email_status": null,
  "q_organization_domains": null
}


Annotating companies search query: 1
Query: Which B2B SaaS companies have more than 100 employees in California?
Annotated Query: {
  "organization_ids": null,
  "organization_num_employees_ranges": "101,",
  "organization_locations": [
    "California"
  ],
  "organization_not_locations": null,
  "q_organization_keyword_tags": [
    "B2B",
    "SaaS"
  ],
  "prospected_by_current_team": null,
  "q_organization_name": null,
  "page": 1,
  "per_page": 10
}


