In [None]:
!pip install psycopg

Collecting psycopg
  Downloading psycopg-3.2.7-py3-none-any.whl.metadata (4.5 kB)
Downloading psycopg-3.2.7-py3-none-any.whl (200 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/200.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━[0m [32m194.6/200.0 kB[0m [31m6.0 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m200.0/200.0 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: psycopg
Successfully installed psycopg-3.2.7


In [2]:
# 1) Install dependencies
!pip install --quiet gspread pandas scikit-learn oauth2client

# 2) Authenticate with Google
from google.colab import auth
auth.authenticate_user()

# 3) Connect to Google Sheets API via gspread
import gspread
from google.auth import default

creds, _ = default()
gc = gspread.authorize(creds)

# 4) Read Sheet1 from your public Google Sheet
import pandas as pd

sheet_id = "1g7oYCpSqa8J3X4nv94LDGR53fJTMQRs9YqDsIQgos-8"
sh = gc.open_by_key(sheet_id)
ws = sh.worksheet("Sheet1")
records = ws.get_all_records()
df = pd.DataFrame(records)

# Rename column F to 'category'
df = df.rename(columns={"Result": "category"})
df = df[["company_name", "category"]].dropna()

# 4b) Read from the "Recruiters" sheet
ws_recruiters = sh.worksheet("Recruiters")
recruiters_list = ws_recruiters.col_values(1)  # first column only

# Remove header and blanks
recruiters_cleaned = [name for name in recruiters_list if name.strip().lower() not in ["", "company_name", "name"]]

# Create a DataFrame with category = "Recruitment"
df_recruiters = pd.DataFrame({
    "company_name": recruiters_cleaned,
    "category": ["Recruitment"] * len(recruiters_cleaned)
})

# 4c) Merge with Sheet1 training data
df_combined = pd.concat([df, df_recruiters], ignore_index=True).drop_duplicates()

# 5) Preview the final training data
print("Combined training data preview:")
print(df_combined.head())

# 6) Train model
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline

pipeline = Pipeline([
    ("vectorizer", CountVectorizer(lowercase=True)),
    ("classifier", MultinomialNB())
])
pipeline.fit(df_combined["company_name"], df_combined["category"])

# 7) Classifier function
def classify_company(name: str) -> str:
    return pipeline.predict([name])[0]

# 8) Test
print("\nExample predictions:")
for ex in ["FDM Group", "MinterEllison", "Next Apex", "PERSOLKELLY", "Visy Industries"]:
    print(f"{ex} → {classify_company(ex)}")


Combined training data preview:
                                        company_name  category
0                       Wizard Professional Services  Industry
1                          AUSTRALIAN FEDERAL POLICE  Industry
2                         First Quantum Minerals Ltd  Industry
3                           Western Australia Police  Industry
4  Department of Energy, Mines, Industry Regulati...  Industry

Example predictions:
FDM Group → Industry
MinterEllison → Industry
Next Apex → Industry
PERSOLKELLY → Recruitment
Visy Industries → Industry


In [None]:
# Interactive classification loop
while True:
    user_input = input("Enter a company name (or type 'exit' to quit): ").strip()
    if user_input.lower() == "exit":
        print("Goodbye!")
        break
    if user_input == "":
        print("Please enter a valid company name.\n")
        continue

    try:
        result = classify_company(user_input)
        print(f"→ {user_input} is classified as: {result}\n")
    except Exception as e:
        print(f"Error: {e}\n")


→ comm bank is classified as: Industry

→ MSWA is classified as: Industry

→ Real time is classified as: Recruitment

→ Kinexus is classified as: Recruitment

