In [24]:
import pandas as pd
import numpy as np
from typing import List
import os

import faiss
import ollama
from ollama import chat as ollama_chat 

In [3]:
# Load the ANZSCO data
df = pd.read_csv("../data/anzsco_full.csv")

In [4]:
# Create text representation for each occupation
df["doc_text"] = (
    df["occupation_name"].fillna("") + "\n"
    + df["skill_level"].fillna("") + "\n"
    + df["tasks"].fillna("") + "\n"
    + "Path: " + df["path"].fillna("")
)

In [5]:
# Function to get embeddings using Ollama
def get_ollama_embedding(text, model_name="nomic-embed-text"):
    try:
        # Call the Ollama embeddings API
        response = ollama.embeddings(model=model_name, prompt=text)
        return response['embedding']
    except Exception as e:
        print(f"Error getting embedding for text: '{text}' - {e}")
        return None

In [10]:
embeddings = df['doc_text'].apply(lambda x: get_ollama_embedding(x)).tolist()

In [20]:
dim = len(embeddings[0])  # embedding dimension
index = faiss.IndexFlatL2(dim)
index.add(np.array(embeddings).astype("float32"))

# Keep metadata mapping
id_to_meta = df.to_dict(orient="records")


In [21]:
def retrieve(query: str, k: int = 5):
    q_emb = np.array([get_ollama_embedding(query)]).astype("float32")
    D, I = index.search(q_emb, k)
    return [id_to_meta[i] for i in I[0]]


In [22]:
query = "What are the tasks of Human Resource Clerks?"
results = retrieve(query, k=3)

context = "\n\n".join([r["doc_text"] for r in results])

prompt = f"""
You are an expert on ANZSCO occupations.
Use the following context to answer the question:

{context}

Question: {query}
Answer:
"""
# Send `prompt` to your chosen LLM (could also be Ollama, Azure OpenAI, etc.)


In [35]:
# import requests

# # method 1: Disable streaming (simplest)
# def ollama_chat(prompt: str, model: str = "llama3") -> str:
#     url = "http://localhost:11434/api/chat"
#     payload = {
#         "model": model,
#         "messages": [
#             {"role": "system", "content": "You are an expert on ANZSCO occupations."},
#             {"role": "user", "content": prompt}
#         ],
#         "stream": False   # 👈 important
#     }
#     response = requests.post(url, json=payload)
#     response.raise_for_status()
#     data = response.json()
#     return data["message"]["content"]

# # Example usage
# answer = ollama_chat("Tell me about Human Resource Clerks")
# print(answer)


In [None]:
import requests, json

# method 2: Handle streaming properly
def ollama_chat_stream(prompt: str, model: str = "llama3"):
    url = "http://localhost:11434/api/chat"
    payload = {
        "model": model,
        "messages": [
            {"role": "system", "content": "You are an expert on ANZSCO occupations."},
            {"role": "user", "content": prompt}
        ],
        "stream": True
    }
    with requests.post(url, json=payload, stream=True) as r:
        r.raise_for_status()
        for line in r.iter_lines():
            if line:
                data = json.loads(line.decode("utf-8"))
                if "message" in data and "content" in data["message"]:
                    yield data["message"]["content"]



In [34]:

# Example usage
for token in ollama_chat_stream("Tell me about Human Resource Clerks"):
    print(token, end="", flush=True)

According to the Australian and New Zealand Standard Classification of Occupations (ANZSCO), Human Resource Clerks are classified under the occupation code 132211.

Here's a summary of the job description, duties, and requirements:

**Job Description:**

Human Resource Clerks perform routine administrative tasks in support of human resources functions. They may work in various settings such as private companies, government agencies, or not-for-profit organizations.

**Key Duties:**

1. Perform general administrative tasks, such as answering phone calls, responding to emails, and maintaining records.
2. Assist with recruitment processes by posting job ads, screening resumes, and scheduling interviews.
3. Maintain accurate and up-to-date personnel records, including employee information, training records, and performance evaluations.
4. Handle benefits administration, such as processing employee insurance claims or managing employee leave.
5. Coordinate internal communications, meetings,

In [None]:
# Save the FAISS index to disk
os.makedirs("./faiss_db", exist_ok=True)
faiss.write_index(index, "./faiss_db/faiss_index.bin")


In [None]:
# Load the FAISS index from disk
loaded_index = faiss.read_index("./faiss_db/faiss_index.bin")