In [None]:
#  Make LinkedIn Introduction with OpenAI Assistant

import os
import json
import time
from google.cloud import firestore
from google.api_core.exceptions import DeadlineExceeded
from google.api_core.retry import Retry
from openai import OpenAI

# initialize the OpenAI client
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))


def show_json(obj):
    display(json.loads(obj.model_dump_json()))


# Assume the google-cloud-firestore import is available in the development environment


# Initialize Firestore client with specific project ID, only if file is found
if os.path.isfile("vk-linkedin-master-service-account.json"):
    os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = (
        "vk-linkedin-master-service-account.json"
    )

db = firestore.Client(project="vk-linkedin", database="linkedin")

# Get the 'extracted' collection
extracted_ref = db.collection("extracted")

# analysis collection
analysis_ref = db.collection("analysis")

In [None]:
def wait_on_run(run, thread):
    """
    Wait for a run to complete.

    This function continuously checks the status of a run in a thread. If the run is queued or in progress,
    it retrieves the run and waits for half a second before checking again. It stops checking and returns the run
    once the run is no longer queued or in progress.

    Args:
        run (openai.api_resources.abstract.APIResource): The run to wait for.
        thread (openai.api_resources.abstract.APIResource): The thread that the run is in.

    Returns:
        openai.api_resources.abstract.APIResource: The completed run.

    Note:
        This function uses the OpenAI API, which requires an API key and may incur costs.
    """

    while run.status == "queued" or run.status == "in_progress":
        run = client.beta.threads.runs.retrieve(
            thread_id=thread.id,
            run_id=run.id,
        )
        time.sleep(1)
    return run

In [None]:
# Make intro from summary using OpenAI Assistant
def make_intro(summary) -> dict:
    """
    Analyze the summary with the OpenAI Assistant.

    This function creates a new thread and adds the summary to it. It then creates a run with a specific assistant ID.
    The function waits for the run to complete and retrieves the messages from the thread. The last message is cleaned
    and converted to a dictionary.

    Args:
        summary (str): The summary to be analyzed.

    Returns:
        dict: The response from the OpenAI Assistant as a dictionary. If the response cannot be decoded as JSON,
        it returns None and prints an error message.

    Raises:
        json.JSONDecodeError: If the response cannot be decoded as JSON.
    """

    try:
        # create a thread
        thread = client.beta.threads.create()

        #  add summary to the thread
        message = client.beta.threads.messages.create(
            thread_id=thread.id, role="user", content=summary
        )

        # create  run
        run = client.beta.threads.runs.create(
            thread_id=thread.id,
            assistant_id="asst_32FBtO8k8v17wQMxyhvATLLq",
        )
    except Exception as e:
        print(f"Error createing Open AI threat: {e}")
        return None

    # get the run result
    try:
        run = wait_on_run(run, thread)
    except Exception as e:
        print(f"Error Getting results from Open AI threat: {e}")
        return None

    # get messages
    messages = client.beta.threads.messages.list(thread_id=thread.id)
    message = messages.data[0]  # the last message

    #  clean response
    response = message.content[0].text.value
    
    return response



In [None]:
# helper function to analyze the summary
def get_inro(summary):
    try:
        analysis = make_intro(summary)
        return analysis
    except Exception as e:
        print(f"Error analyzing summary: {e}")
        return None


In [None]:
# process all documents in collection `extracted` via generator

# retry policy for DeadlineExceeded in case of timeout in Firestore
retry = Retry(predicate=lambda e: isinstance(e, DeadlineExceeded))

# docs_generator = (doc for doc in extracted_ref.stream(retry=retry))

# get docs
docs = extracted_ref.stream(retry=retry)

# number of documents
# print("number of extracted documents: ", len(list(docs)))

# process docs
for doc in docs:
    doc_dict = doc.to_dict()

    # chek if the document already exists
    doc_id = doc.id
    doc_ref = analysis_ref.document(doc_id)
    
    # skip if intro already exists in the document
    if "intro" in doc_dict:
        continue

    summary = doc_dict["summary"].replace("\n", " ")

    if len(summary) > 50:
        # get analysis of summary via OpenAI Assistant
        try:
            intro = get_inro(summary)

            # update document with intro if available
            if intro is not None:
                # test print
                # print(
                #     "-----------------\n",
                #     doc_id,
                #     doc_dict["fullName"],
                #     "\n",
                #     summary,
                #     "\n",
                #     intro,
                # )

                update_doc = {
                    "profileUrl": doc_dict["profileUrl"],
                    "lh_id": doc_dict["lhId"],
                    "summary": summary,
                    "intro": intro,
                }

                analysis_ref.document(doc_id).set(
                    update_doc,
                    retry=Retry(predicate=lambda e: isinstance(e, DeadlineExceeded)),
                )
        except Exception as e:
            print(e)
            time.sleep(10)
            continue