This notebook is for testing new ideas to improve the results in the persons page

In [7]:
import openai, time, os, requests
import pandas as pd
from concurrent.futures import ThreadPoolExecutor
from ast import literal_eval
from typing import List, Dict, Any, Optional, Tuple
from UnleashClient import UnleashClient
%matplotlib inline

# Import environment variables 
google_api_key = os.environ.get('GOOGLE_API_KEY')
search_engine_id = os.environ.get('SEARCH_ENGINE_ID')
openai.api_key = os.environ.get('OPENAI_API_KEY')
OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
REQUESTS_TIMEOUT = 15
directory = os.getcwd()
MODEL = "gpt-3.5-turbo"

In [5]:

def refine_search_result(search_result: Dict[str, Any]) -> Optional[Dict[str, Any]]: 
    """
    Refine a google search result dictionary by extracting the title, link, and snippet.

    Args:
        search_result (dict): The search result dictionary to be refined.

    Returns:
        dict: A dictionary with the refined search result containing title, link, and snippet.
    """
    if all([term in search_result for term in ["title", "link", "snippet"]]):
        return {
            'title': search_result['title'],
            'link': search_result['link'],
            'snippet': search_result['snippet']
        }
    else:
        return None

In [6]:
def perform_google_search(search_query: str) -> Optional[List[Dict[str, Any]]]:
    """
    Perform a Google search using a search query and retrieve filtered search results with only title,link and snippet.

    Args:
        search_query (str): The search query to be used.

    Returns:
        list: A list of dictionaries containing refined search results with title, link, and snippet.
    """
    url = f'https://www.googleapis.com/customsearch/v1?key={google_api_key}&cx={search_engine_id}&q={search_query}'
    response = requests.get(url)
    try:
        data = response.json()
        cleaned_results = [refine_search_result(i) for i in data['items']]
        return cleaned_results
    except Exception as e:
        return None

In [13]:
query = '"data engineer"' + " at " + '"Accenture"' + '"Present"' + " site:linkedin.com/in/"
results = perform_google_search(query)

In [19]:
def format_google_search_query(job_titles: List[str], company: str) -> str:
    """"
    Format the google search query using the job titles and company name.and Applying regex on it
    Args:
        job_titles (list): A list of job titles.
        company (str): The company name.
    Returns:
        str: A formatted google search query.
    """
    formatted_job_titles = []
    for job_title in job_titles:
        formatted_job_titles.append(f'"{job_title}"')
    job_titles_statement = " OR ".join(formatted_job_titles)
    return f'({job_titles_statement}) at "{company}" site:linkedin.com/in/'

In [21]:
print(format_google_search_query(["data engineer","data scientist"], "Accenture"))

("data engineer" OR "data scientist") at "Accenture" site:linkedin.com/in/


In [8]:
def get_synonyms_from_open_ai(job_title: str) -> Optional[Dict[str, List[str]]]:
    """
    Get synonyms from OpenAI

    :param job_title: job title
    :return: synonyms
    """

    prompt_system = f"""
    You are a sales person trying to find as many people as possible that perform a certain role at a company. 
    You will receive a json where each key is a single job title. Respond with as many job titles that are 
    synonyms (but no more than 5) to the one received, meaning that people having those job positions perform exactly the same job or 
    their scope of tasks overlap greatly. Include acronyms too even if they duplicate Your existing suggestions.
    Each key has empty list assigned. Fill the list with synonyms.
    Respond in format that is json parsable. Do not explain or comment on your answer.
    """

    headers = {"Authorization": f"Bearer {OPENAI_API_KEY}",
               "Content-Type": "application/json"}
    data = {
        "model": MODEL,
        "messages": [
            {
                "role": "system",
                "content": prompt_system},
            {
                "role": "user",
                "content": str(job_title)
            }
        ],
        "temperature": 0
    }

    try:
        response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers,
                                 json=data, timeout=REQUESTS_TIMEOUT).content.decode("utf-8")
        return literal_eval(literal_eval(response)['choices'][0]['message']['content'])
    except:
        print("Timeout occurred")
        return None


def verify_last_experience(experiences, company_id):
    """
        Verify the last experience of a LinkedIn profile to check if the company ID is present.
    Args:
        experiences (list): A list of experiences from a LinkedIn profile.
        company_id (str): The linkedin company ID to check.

    Returns:
        bool: True if the company ID is present in the last experience, False otherwise.
    """
    last_experience_url = experiences[0]["company_linkedin_profile_url"] if experiences else None
    if last_experience_url:
        return company_id in last_experience_url
    return False

In [9]:
def get_answer_from_open_ai(job_title: str, company: str,
                            search_results: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
    """
    Retrieve an answer from the OpenAI ChatGPT model using the provided input.

    Args:
        job_title (str): The job title to be used in the prompt.
        company (str): The company name to be used in the prompt.
        max_results (int): The maximum number of results
        search_results (list): A list of dictionaries containing refined search results with title, link, and snippet.

    Returns:
        dict: A dictionary containing one of the search results as the answer.
    """
    prompt_system = f"""
    You are going to receive a list of jsons of real google search results, each with an url to a LinkedIn 
    profile, title and snippet. Your task is assign scores 1-10 to each of the profiles following mechanism:
    1 - very unlikely to be {job_title} at {company}, 10 - very likely to be {job_title} at {company}. You can
    assign the same score to multiple candidates.
    Sort the profiles descending by score and return top 10.
    Do not comment on Your answer or explain it. Your answer must be a json parsable list and each entry
    in the format: 'Score++Full Name++Job title++LinkedIn URL'
    """

    headers = {"Authorization": f"Bearer {OPENAI_API_KEY}",
               "Content-Type": "application/json"}
    data = {
        "model": MODEL,
        "messages": [
            {
                "role": "system",
                "content": prompt_system},
            {
                "role": "user",
                "content": f"Search results: {search_results[:10]}"
            }
        ],
        "temperature": 0
    }

    try:
        response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers,
                                 json=data, timeout=REQUESTS_TIMEOUT).content.decode("utf-8")
        
        response1 = literal_eval(response) ['choices'][0]['message']['content']
        return literal_eval(response1)
    except:
        print("Timeout occurred")
        return None