In [1]:
from llama_index.core.memory import ChatMemoryBuffer
from llama_index.core import download_loader
from llama_index.core import VectorStoreIndex, ServiceContext, Document, StorageContext
from llama_index.llms.openai import OpenAI
from llama_index.llms.ollama import Ollama

In [2]:
import pandas as pd
from typing import Any, Dict
import logging
import json

In [3]:
import sys
sys.path.append('../libraries/')

In [4]:
import neo4j_lib as nl
import claude_prompts as cp

In [5]:
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [7]:
MEMORY = ChatMemoryBuffer.from_defaults(token_limit=8192)
llm = OpenAI(temperature=0, model="gpt-4o", max_tokens=4096)
llm = Ollama(model="llama3.1:latest", temperature=0, max_tokens=8192, request_timeout=120.0)

In [8]:
def create_chat_engine(advert):
    
    if advert:
        documents = [Document(text=advert)]

        index = VectorStoreIndex.from_documents(documents)
        return index.as_chat_engine(
            chat_mode="context",
            llm=llm,
            memory=MEMORY,
            system_prompt=(
                "A a career forensic analyst you have deep insight into crime and criminal activity especially human trafficking.  "
                "Your express goal is to investigate online recruitment advert and extract pertinent factual detail."
            ),
        )
    else:
        st.error(f"Failed to extract text from URL: {advert}")
        return None

In [19]:

def analyse_advert(chat_engine: Any, advert: str, prompt_name: str) -> Dict[str, Any]:
    """
    Analyze an advertisement using a chat engine and a specific prompt.

    Args:
        chat_engine: The chat engine to use for analysis.
        advert: The advertisement text to analyze.
        prompt_name: The name of the prompt to use.

    Returns:
        A dictionary containing the analysis results, or a default dictionary if analysis fails.
    """
    if not chat_engine:
        logger.error("Chat engine is not initialized")
        return {
            "result": "error",
            "evidence": [],
            "explanation": "Chat engine not initialized",
            "confidence": 0.0,
        }

    try:
        prompt = cp.CLAUDE_PROMPTS.get(prompt_name) + cp.ANALYSIS_STR
        if not prompt:
            raise ValueError(f"Invalid prompt name: {prompt_name}")

        response = chat_engine.chat(prompt + cp.ANALYSIS_STR)
        logger.info(f"Response to {prompt_name}: {response.response}")

        # Extract and parse JSON from the response
        json_str = response.response.strip("`").strip()
        if json_str.startswith("json"):
            json_str = json_str[4:]  # Remove 'json' prefix if present
        parsed_response = json.loads(json_str)

        analysis_response = nl.AnalysisResponse(**parsed_response)
        # Standardize the output
        return analysis_response

    except Exception as e:
        logger.error(f"Error processing advert, prompt {prompt_name}: {str(e)}")
        return {
            "result": "error",
            "evidence": [],
            "explanation": str(e),
            "confidence": 0.0,
        }


In [25]:

def analyse_advert_with_prompt(chat_engine: Any, advert: str, prompt: str) -> Dict[str, Any]:
    """
    Analyze an advertisement using a chat engine and a specific prompt.

    Args:
        chat_engine: The chat engine to use for analysis.
        advert: The advertisement text to analyze.
        prompt_name: The name of the prompt to use.

    Returns:
        A dictionary containing the analysis results, or a default dictionary if analysis fails.
    """
    if not chat_engine:
        logger.error("Chat engine is not initialized")
        return {
            "result": "error",
            "evidence": [],
            "explanation": "Chat engine not initialized",
            "confidence": 0.0,
        }

    try:
        complete_prompt = prompt + cp.ANALYSIS_STR
        if not prompt:
            raise ValueError(f"Invalid prompt name: {prompt_name}")

        response = chat_engine.chat(complete_prompt)
        logger.info(f"Response to {prompt_name}: {response.response}")

        # Extract and parse JSON from the response
        json_str = response.response.strip("`").strip()
        if json_str.startswith("json"):
            json_str = json_str[4:]  # Remove 'json' prefix if present
        parsed_response = json.loads(json_str)

        analysis_response = nl.AnalysisResponse(**parsed_response)
        # Standardize the output
        return analysis_response

    except Exception as e:
        logger.error(f"Error processing advert, prompt {prompt_name}: {str(e)}")
        return {
            "result": "error",
            "evidence": [],
            "explanation": str(e),
            "confidence": 0.0,
        }


In [11]:
unprofessional_writing_prompt = (
        "Analyze the following job advertisement for signs of unprofessional writing such as poor grammar or spelling. "
        "Accept that missing spaces or words not separated by a space or spaces in the text, such as 'andcleaners', 'towork', is NOT a sign of unprofessional writing. "
        "Return your analysis in the following JSON format:"
    ),

In [12]:
prompt_name = "assure_prompt"
query = """
MATCH (g:Group)-[:HAS_POSTING]-(posting:Posting)-[has_analysis:HAS_ANALYSIS {type: $prompt_name}]-(analysis:Analysis)
WHERE analysis.result = 'yes'
RETURN ID(g) AS groupIDn, g.name AS group_name, ID(posting) AS IDn, posting.post_url as url, posting.post_id AS post_id, posting.text AS advert, analysis.result as verdict, analysis.explanation as explanation
"""
parameters = {"prompt_name": prompt_name}
adverts = pd.DataFrame(nl.execute_neo4j_query(query, parameters))

Neo4j connection established.
Neo4j connection closed.


In [13]:
adverts.loc[0]['url'], adverts.loc[0]['IDn'], adverts.loc[0]['group_name']

('https://www.facebook.com/groups/1250339572505634/posts/1511065519766370',
 573471,
 'Cleaning & Hospital Jobs is Hiring Part-Time Jobs')

In [None]:
adverts.loc[0]['explanation']

In [14]:
IDn = 573388
advert = nl.get_neo4j_advert(IDn)

Neo4j connection established.
Neo4j connection closed.


In [15]:
advert

'PICK N PAY is hiring General workers andcleaners, 90 men and 160 women towork at our storeNo certificate wanted .salary R4500per monthClosing date 28 March 2024Age 18 to 35 yearsMust hard workersMust be available urgentlyMust be will to work and learnAPPLY HERE:https://sites.google.com/view/job-updates2024/homeFor more vacancies'

In [27]:
chat_engine = create_chat_engine(advert)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


"Examine the following job advertisement. Does the job ad convey a tone or use language that subtly assures applicants they are highly likely to get the job, even if it doesn't explicitly say so? Look for phrases or a manner of presenting the opportunity that implies it's a 'sure thing' or guarantees success. Return your analysis in the following JSON format:"

In [20]:
advert_analysis = analyse_advert(chat_engine, advert, 'assure_prompt')

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
ERROR:__main__:Error processing advert, prompt assure_prompt: [Errno 61] Connection refused


In [23]:
prompt =  cp.CLAUDE_PROMPTS.get(prompt_name)

In [28]:
advert_analysis =  analyse_advert_with_prompt(chat_engine, advert, prompt)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
INFO:__main__:Response to assure_prompt: {"result": "yes", "evidence": ["No certificate wanted.", "salary R4500 per month"], "confidence": 0.8, "explanation": "The job ad presents a relatively low barrier to entry (no certificate required) and a competitive salary, which may create an impression of ease in securing the position."}


In [None]:
advert_analysis

In [None]:
unprofessional_writing_prompt[0]

In [None]:
llm = Ollama(model="llama3.1", temperature=0, max_tokens=8192)

In [6]:
llm = Ollama(model="llama3.1:latest", temperature=0, max_tokens=8192, request_timeout=120.0)

NameError: name 'Ollama' is not defined