In [8]:
import pandas as pd

from TendersWA import Panda
structured_tenders = Panda.load_tender_uniques("../../data/UpdatedAgainTenders.xlsx")

# loads and combines cluster csv files into a panda data frame.
def load_cluster_pair(clusters_fname, cluster_topics_fname):
    def strip_bad_chars(df):
        for index, row in df.iterrows():
            df.at[index, "Reference Number"] = row["Reference Number"].replace("[","").replace("]","")
        
    clusters = pd.read_csv(clusters_fname, dtype={"Reference Number": str})
    strip_bad_chars(clusters)
    topics = pd.read_csv(cluster_topics_fname, dtype={"Topics": str})
    df = pd.merge(clusters, topics, on = "Cluster", how = "left")
    return pd.merge(structured_tenders, df, on = "Reference Number")

In [9]:
bert_df = load_cluster_pair("../../data/clustering/bert_clusters.csv", "../../data/clustering/bert_cluster_topics.csv")

In [10]:
sentence_transformer_df = load_cluster_pair("../../data/clustering/sentence_bert_clusters.csv", "../../data/clustering/sentence_bert_cluster_topics.csv")

In [48]:
from IPython.display import HTML
def display_cluster(df, cluster):
    subset = df.loc[df["Cluster"] == cluster].reset_index()
    cluster_topics = subset.at[0, "Topics"]
    display(HTML(f"<h4>Topics: {cluster_topics}"))
    Panda.pretty_print(subset[["Reference Number", "Contract Title", "Description"]])

In [54]:
display_cluster(sentence_transformer_df, 5)

Reference Number,Contract Title,Description
MCB20212210,Supply deliver one licensed Articulated :Loader diesel,Supply & deliver one (1) licensed small Articulated Loader 1700Kg GVM diesel engine at Karrakatta Cemetery as er specification.
MCB20192009,Supply Deliver Diesel licensed loader,Supply and deliver one licensed articulated loader to Guildford Cemetery as per specification.
PRO5621,Palmyra Bus Depot | D&C Diesel Storage Upgrades,Programmed Facility Management Pty Ltd (PFM) on behalf of Public Transport Authority (PTA) for Transperth are seeking Tenders from suitably qualified and experienced contractors to undertake the design and construction of Diesel storage upgrade at Palmyra Bus Depot. The Work includes but is not limited to ? Installation of new diesel storage and distribution system and; ? Remediation of the existing diesel storage and distribution system.
PTA190852,Supply and Delivery of Diesel Exhaust Fluid for the Transwa Road Coach Depot,Supply and Delivery of Diesel Exhaust Fluid for the Transwa Road Coach Depot


In [59]:
def search_topics(key_words, df):
    cluster_topics = df[["Cluster", "Topics"]].drop_duplicates(subset = ["Cluster"])
    matching_topics = []
    matching_clusters = []
    for index, row in cluster_topics.iterrows():
        if key_words in row["Topics"] or row["Topics"] in key_words:
            matching_topics.append(row["Topics"])
            matching_clusters.append(row["Cluster"])
    Panda.pretty_print(pd.DataFrame({"Cluster": matching_clusters, "Topics": matching_topics}))

In [62]:
search_topics("food", sentence_transformer_df)

Cluster,Topics
226,"hospitality, supply, prepared, food, catering"
877,"food, patient, meals, hospital, trolleys"
885,"beverage, industry, development, food, wa"
1258,"echocardiogram, pilbara, audit, safety, food"
1100,"pfd, bidfood, products, prospector, buffet"


In [63]:
display_cluster(sentence_transformer_df, 885)

Reference Number,Contract Title,Description
JTSI2223006,Input for the WA Innovation Strategy and development of an Expression of Interest for a Venture Fund,Input for the WA Innovation Strategy and development of an Expression of Interest for a Venture Fund
JTSI2021042,Free Trade Agreements - WA Legislative Review,Free Trade Agreements - Legislative Review
DPIRD2023101,Western Australia (WA) Craft Beer Industry Development Strategy,"DPIRD is seeking proposals from suitably qualified consultants to develop a Craft Beer Industry Development strategy for Western Australia. The strategy is being developed in a partnership with industry representative bodies, the Independent Brewers Association, the WA Brewers Association and the South West Brewers Alliance. The strategy will align with the national blueprint, ?Future Brew 2031? developed by the Independent Brewers Association in 2021."
DPIRD2022077,WA Food and Beverage Industry Group Study,The Department of Primary Industries and Regional Development (DPIRD?s) Food Industry Development Branch requires the service of a suitable Consultant to assist with the understanding of what Agrifood industry representation exists in Western Australia (WA) or has in the past. What is the gap in the market and what options survey of global peers suggestion and established best practice models may be appropriate and can succeed in the WA context.
DPIRD2022086,Market Research and Decision Support Tools to Assist the Export of Perishable WA Food and Beverage,Market Research and Decision Support Tools to Assist the Export of Perishable WA Food and Beverage
DPIRD2021088,The WA Agrifood Opportunity Review - Provision of Evidence and Insights to Demonstrate the Economic Benefits of Government Support to the WA Value Add Sector of Primary Industries,"WA has historically underperformed in the level of value adding to our agricultural produce when compared against other States, and has therefore limited the economic potential and benefits of the sector. The broader Food and Beverage (F&B) value add sector has been identified as a priority sector for the State (Ref: Diversify WA and Department of Primary Industries and Regional Development (DPIRD?s) Primary Industries Plan) due to its potential to create economic growth, diversification and jobs (in metro and the regions) combined with Australia?s natural comparative advantages in agricultural production, food quality and safety. The Agribusiness, Food & Trade (AFT) Directorate within DPIRD is looking to work with a consultant to deliver a consolidated report which will help inform strategic direction and program design."
DPIRD2021082,"WA Organics Industry: Opportunities, Challenges and Options for Development","WA Organics Industry: Opportunities, Challenges and Options for Development"
DPIRD2021079,Characterisation of export markets and business development opportunities for Western Australian food and beverage manufacturers in food for health markets,Characterisation of export markets and business development opportunities for Western Australian food and beverage manufacturers in food for health markets
DPIRD2021045,Provision of Consultancy Services for the Development of a WA Specific Framework for the Bioenergy and Bio Products Industries,Provision of Consultancy Services for the Development of a WA Specific Framework for the Bioenergy and Bio Products Industries
DPIRD2021012,Sustainably Produced Food What does it mean to the End Consumer,"The Department of Primary Industries and Regional Development (DPIRD) is the WA Government’s lead agency for the growth of the WA agrifood industry. The Agribusiness Food and Trade (AFT) team, within DPIRD, leads the Department’s export development and investment attraction industry support programs, in alignment with DPIRD’s Primary Industries Plan. The WA agribusiness sector contributes a gross value of production of over $10 billion, contributing over 55,000 jobs in WA. Agrifood exports account for 5% of WA exports, with 60% of export receipts derived from six (6) Asian nations. The Department’s focus is to ensure that by 2030, increasingly sophisticated, diversified and globally competitive primary industries will be making a significant contribution to creating prosperity across WA. Specifically, the Department wants to understand how sustainability will contribute to an increasingly competitive WA primary industries sector. Specifically, how can the sector maximise economic returns whilst ensuring sustainable communities and minimising environmental impacts (the three components of sustainability – economic, community, environment)."


In [74]:
sentence_transformer_df[["Cluster"]].value_counts()

Cluster
1404       86
397        68
1324       63
10         60
71         49
           ..
1566        1
1392        1
1394        1
1852        1
1273        1
Name: count, Length: 2000, dtype: int64