# Set up

In [2]:
from TendersWA.Models import Embedding_Model as em
from TendersWA.Preprocessing import Text as text
import os
import pandas as pd
import numpy as np

In [3]:
model = em.Sentence_transformer()

In [4]:
embs = np.load('../data/embedding_data/sent_transformer_embeddings.npz')
embs = [embs[f] for f in embs.files]
# convert into data frame
tender_embedding_df = pd.DataFrame({"Embedding": embs})

In [5]:
import pandas as pd
tenders_structured_path = r"../data/UpdatedAgainTenders.xlsx"

tenders_structured = pd.read_excel(tenders_structured_path, dtype={"Reference Number": str})
tenders_structured = tenders_structured[["Reference Number", "Contract Title", "Description"]].dropna(subset=["Reference Number"]).drop_duplicates(subset=["Reference Number"])

# clean the descriptions.
for index, row in tenders_structured.iterrows():
    desc = text.remove_html_tags(row["Description"])
    tenders_structured.at[index, "Description"] = desc

In [6]:
tender_refs = open("../data/embedding_data/tender_references.txt", "r")
content = ""
for line in tender_refs:
    content = line
    break
    
content = content.replace("[", "")
content = content.replace("'", "")
content = content.replace(",", "")
refs = content.split(" ")

In [7]:
tender_embedding_df['Reference Number'] = refs
merged_data = tender_embedding_df.merge(tenders_structured, on="Reference Number", how="left")

In [24]:
#https://arxiv.org/abs/2010.00117
def mmr(query_emb, embs, d = 0.5, n = 10):
    found_embs = []
    found_embs_index = {}
    embs_to_consider = embs.copy()
    for i in range(0, n):
        max_score = -1000000000
        best_emb_index_found = -1
        for emb_index, emb in enumerate(embs_to_consider):
            if emb_index in found_embs_index: # skip found embeddings
                continue
            emb_to_query_sim  = em.cosine_sim(query_emb, emb)

            max_sim_to_found_embs = 0
            for found_emb in found_embs:
                found_emb_to_considered_sim = em.cosine_sim(emb, found_emb)
                if found_emb_to_considered_sim > max_sim_to_found_embs:
                    max_sim_to_found_embs = found_emb_to_considered_sim
            
            score = d * emb_to_query_sim - (1 - d) * max_sim_to_found_embs
            if score > max_score:
                max_score = score
                best_emb_index_found = emb_index
        
        # add the found emb, remove from those to consider
        found_embs_index[best_emb_index_found] = None
        found_embs.append(embs_to_consider[best_emb_index_found])
    return list(found_embs_index.keys())

from sklearn.metrics.pairwise import cosine_similarity
def query_tenders(query, model, merged_data, top_k=10, algorithm = "top", d = 0.5):
    # Encode the query, model needs to be the same that generated tender embeddings
    query_embedding = model(query)

    unpacked_embs = np.block([[unpacked[0]] for unpacked in merged_data[["Embedding"]].values])
    
    found_indices = []
    if algorithm == "top":
        similarities = cosine_similarity(query_embedding.reshape(1, -1), unpacked_embs)
        found_indices = similarities[0].argsort()[-top_k:][::-1]
    else:
        found_indices = mmr(query_embedding, unpacked_embs, d, top_k)
    return merged_data.iloc[found_indices][["Reference Number", "Contract Title", "Description"]]

from IPython.display import HTML
def query_and_print_tenders(query, model, merged_data, top_k=10, algorithm = "top", d = 0.5):
    res_df = query_tenders(query, model, merged_data, top_k=top_k, algorithm = algorithm, d = d)
    display(HTML(f"<h4>Query: {query}"))
    Panda.pretty_print(res_df)

# Querying 

In [20]:
query_and_print_tenders("hospital plumbing", model, merged_data, top_k = 3, algorithm = "top")

Reference Number,Contract Title,Description
SMHS202311395,Fremantle Hospital - Hydraulic Fixture Audit and Floor Plans Update,"South Metropolitan Health Service, Fremantle Hospital require a Hydraulic Water Outlets Audit to be performed, and update Floor Plans for all buildings and floors, excluding the V Block, representing 58 floors."
WACHS20205659,Panel Contract for Plumbing Services to WACHS Goldfields Region,"WACHS Goldfields requires plumbing services across a range of clinical, administrative and residential facilities. The scope of plumbing services required includes, but is not limited to, maintenance, repairs and upgrading of: Hot and cold water services and associated fixtures; Waste water and storm water drainage assets; Reduced Pressure Zone Devices (RPZDs); Sewage pumps; Gas fired water heating equipment; and Gas cooking equipment and reticulated gas outlets."
WACHS202310069,Panel Contract for Plumbing Services to WA Country Health Service - South West Region,"The WA Country Health Service South West (WACHS South West) is seeking offers for the provision of plumbing services at clinical, administrative and residential facilities across the South West Region to ensure continued reliable operation of plumbing services and assets. The requirement ranges from short-term emergency plumbing services (available 24/7) to larger scheduled plumbing projects involving replacement or upgrading of plumbing assets as described in this Request. Suitably qualified, experienced and licenced service providers are to provide the services required and comply with Australian Standard AS3500 Plumbing & Drainage across all WACHS South West facilities."


In [27]:
query_and_print_tenders("hospital plumbing", model, merged_data, algorithm = "mmr", d = 0.7)

Reference Number,Contract Title,Description
SMHS202311395,Fremantle Hospital - Hydraulic Fixture Audit and Floor Plans Update,"South Metropolitan Health Service, Fremantle Hospital require a Hydraulic Water Outlets Audit to be performed, and update Floor Plans for all buildings and floors, excluding the V Block, representing 58 floors."
GRA20200103,Provision of Replacement Evaporative Air Conditioning Units at Graylands Hospital,"North Metropolitan Health Service (NMHS) requires the removal, replacement, installation of two (2) evaporative air conditioning units located in the Frankland Gymnasium, contained within a secure facility in Graylands. Hospital."
WACHS20205659,Panel Contract for Plumbing Services to WACHS Goldfields Region,"WACHS Goldfields requires plumbing services across a range of clinical, administrative and residential facilities. The scope of plumbing services required includes, but is not limited to, maintenance, repairs and upgrading of: Hot and cold water services and associated fixtures; Waste water and storm water drainage assets; Reduced Pressure Zone Devices (RPZDs); Sewage pumps; Gas fired water heating equipment; and Gas cooking equipment and reticulated gas outlets."
WACHS20194721,Patient Bathroom Conversion to Universal Access (UA) Amenities Donnybrook Hospital,WACHS-SW Infrastructure requires a contractor to convert the existing patient shower and toilets (x 2) to New Single Universal Access (UA) Shower and Toilet to comply with the latest version AS1428.1-2009 at Donnybrook Hospital
EMHS202311376,SUPPLY OF HEMOFILTRATION FLUIDS,SUPPLY OF HEMOFILTRATION FLUIDS RPH ICU
DOH202210703,Department of Health - Maintenance of Medical Equipment,Department of Health - Maintenance of Medical Equipment
SMHS20182363,Supply of Haemodialysis Machines and Associated Consumables to Fiona Stanley Hospital,"The South Metropolitan Health Service (SMHS) has a requirement for the supply installation, commissioning and maintenance of haemodialysis machines, treatment chairs, renal dialysis data management system, mobile water purification systems and associated accessories, consumables, training and services through a consolidated Price Per Treatment (PPT) arrangement for the Nephrology and Renal Transplantation Service at Fiona Stanley Hospital (FSH). The Contractor will retain ownership of the haemodialysis machines, dialysis treatment chairs and mobile water purification systems at the end of the Term of the Customer Contract."
20023951016,Armadale Health Service_Negative Pressure Room Creation,To carry out works in existing patient rooms to create 2 negative pressure isolation rooms that comply with WAHFG type 5 isolation room requirements. One negative pressure room being part of the ICU area and the other in Canning Ward.
CAHS19029,Patient Meals Kitchen High Level Cleaning & Cool Rooms Cleaning and Sanitising,Patient Meals Kitchen High Level Cleaning & Cool Rooms Cleaning and Sanitizing at Perth Children's Hospital
NMHS202210645,"Supply, Delivery, Installation, Commissioning and Maintenance of two Steelco TW 3000/2 Smart Tunnel Double Chamber Compartment Decontamination Washer Systems for Sir Charles Gairdner Hospital","North Metropolitan Health Service (NMHS) requires the Supply, Installation, Commissioning and Maintenance of two Steelco TW 3000/2 ?Smart Tunnel? Double Chamber Compartment Decontamination Washer Systems (?Decontamination Washer Systems?) for SCGH."
