Instructions:
1. Connect to a runtime with gpu (likely T4 because it's free)
2. Drag the Clean Georgia Data.csv into the file bar on the left
3. Go to runtime in the top left and press run all


Be warned: It's not always 100% accurate

# Setup 5-8 mins

In [None]:
!pip install unsloth
!pip install peft

from unsloth import FastLanguageModel
from peft import PeftModel

# Load the base model with FastLanguageModel
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Llama-3.2-3B-Instruct",
    max_seq_length=1048,
    dtype=None,
    load_in_4bit=True
)

In [None]:
!pip install sklearn
!pip install pandas
!pip install nltk
!pip install sentence-transformers

In [None]:
import torch
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer, util
import nltk
from nltk.corpus import stopwords

# Ensure NLTK stopwords are downloaded
nltk.download("stopwords")

# Load CSV data for policies
file_path = 'Clean Georgia Data.csv'  # Adjust path if necessary
df = pd.read_csv(file_path, encoding='MacRoman')

# Load the SBERT model for policy ranking
model_sbert = SentenceTransformer('all-MiniLM-L6-v2')

# Load the main language model and tokenizer with LoRA adapter
base_model_name = "unsloth/Llama-3.2-3B-Instruct"
adapter_path = "jaspersands/model"  # Path to your LoRA adapter on Hugging Face

model = PeftModel.from_pretrained(model, adapter_path)

In [20]:
from unsloth.chat_templates import get_chat_template
# print(tokenizer)
def search_relevant_policies(query, df, top_n=10):
    """Retrieve top N relevant policies based on cosine similarity with TF-IDF."""
    tfidf = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf.fit_transform(df['Content'])
    query_vector = tfidf.transform([query])
    cosine_sim = cosine_similarity(query_vector, tfidf_matrix).flatten()
    top_indices = cosine_sim.argsort()[-top_n:][::-1]
    return df.iloc[top_indices]

def process_query(query):
    """Process a query to return the model's response and the most relevant policy link."""

    # Step 1: Retrieve relevant policies based on the query
    relevant_policies = search_relevant_policies(query, df)

    # Step 2: Format the relevant policies for input to the model
    formatted_policies = [
        f"Title: {row['Title']}\nTerritory: {row['Territory']}\nType: {row['Type']}\nYear: {row['Year']}\nCategory: {row['Category']}\nFrom: {row['From']}\nTo: {row['To']}\nContent: {row['Content']}\nLink: {row['Link to Content']}\n"
        for _, row in relevant_policies.iterrows()
    ]
    relevant_policy_text = "\n\n".join(formatted_policies)

    # Step 3: Structure messages for the chat template
    messages_with_relevant_policies = [
        {"role": "system", "content": relevant_policy_text},
        {"role": "user", "content": query},
    ]

    # Step 4: Tokenize the input for the model
    # tokenizer = get_chat_template(
    #     tokenizer,
    #     chat_template="llama-3.1",
    # )
    inputs = tokenizer.apply_chat_template(
        messages_with_relevant_policies,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt"
    ).to("cuda" if torch.cuda.is_available() else "cpu")

    # Step 5: Generate the model's response
    FastLanguageModel.for_inference(model)
    outputs = model.generate(input_ids=inputs, max_new_tokens=256, use_cache=True, temperature=1.5, min_p=0.1)
    generated_response = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]

    # Step 6: Rank the relevant policies using SBERT
    response_embedding = model_sbert.encode(generated_response, convert_to_tensor=True)
    policy_embeddings = model_sbert.encode(relevant_policies['Content'].tolist(), convert_to_tensor=True)
    cosine_similarities = util.cos_sim(response_embedding, policy_embeddings).flatten()
    most_relevant_index = cosine_similarities.argmax().item()
    most_relevant_link = relevant_policies.iloc[most_relevant_index]['Link to Content']

    # Step 7: Return the response and the most relevant link
    return {
        "response": generated_response,
        "most_relevant_link": most_relevant_link
    }

def get_content_after_query(response_text, query):
    # Find the position of the query within the response text
    query_position = response_text.lower().find(query.lower())
    if query_position != -1:
        # Return the content after the query position
        return response_text[query_position + len(query):].strip()
    else:
        # If the query is not found, return the full response text as a fallback
        return response_text.strip()

# Final run 1min

You can just run this cell and change the query if the setup has already run once

You'll see an original which should have a link at the end that maybe or may not lead to a real database page as well as a second link which is a direct copy from the database.

In [None]:
query = "How much will the state pay for childrens allowance?"
result = process_query(query)

# Extract only the content after the query
content_after_query = get_content_after_query(result["response"], query)

# Display the cleaned response and the most relevant link
print("Query: ", query)
print("Response:", content_after_query)
print("Most Relevant Link:", result["most_relevant_link"])