In [4]:
import sqlite3

conn = sqlite3.connect("supplier.db")
cursor = conn.cursor()
schema_path = "schema.sql"

def create_tables(conn:sqlite3.Connection,cursor:sqlite3.Cursor,schema_path:str):
    with open(schema_path, "r") as f:
        sql_script = f.read()
        cursor.executescript(sql_script)
        conn.commit()

def get_tables(cursor:sqlite3.Cursor):
    cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
    return cursor.fetchall()


create_tables(conn, cursor, schema_path)
print(get_tables(cursor))

[('suppliers',), ('branches',), ('contacts',), ('specialism_types',), ('branch_specialisms',)]


In [5]:
import pandas as pd

df = pd.read_csv("Merchant Locations - London - Sheet1.csv", header=1)

suppliers = df.columns.tolist()

conn = sqlite3.connect("supplier.db")
cursor = conn.cursor()

cursor.executemany("INSERT OR IGNORE INTO suppliers (name) VALUES (?);",
                   [(m,) for m in suppliers[1:]])

conn.commit()

cursor.execute("SELECT * FROM suppliers;")
rows = cursor.fetchall()
supplier_id_dict = {row[1]: row[0] for row in rows}

cursor.executemany("INSERT OR IGNORE INTO branches (supplier_id, postcode) VALUES (?, ?);",
                   [(supplier_id_dict[supplier], postcode) for supplier in suppliers[1:] for postcode in df[supplier] if not pd.isna(postcode)])

conn.commit()
conn.close()

In [6]:
conn = sqlite3.connect("supplier.db")
cursor = conn.cursor()

cursor.execute("SELECT * FROM suppliers;")

supplier_rows = cursor.fetchall()
for row in supplier_rows:
    print(row)

cursor.execute("SELECT * FROM branches;")

branch_rows = cursor.fetchall()
for row in branch_rows:
    print(row)

(1, 'Jewson', None)
(2, 'TP', None)
(3, 'Selco', None)
(4, 'Laver', None)
(5, 'MKM', None)
(6, 'Howarth', None)
(7, 'Huws Gray', None)
(8, 'PGR', None)
(9, 'Fulham', None)
(10, 'Builder Depot', None)
(11, 'Champion', None)
(12, 'Lords', None)
(13, 'Lawsons', None)
(14, 'ABC', None)
(15, 'EH Smith', None)
(16, 'Alsford', None)
(17, 'Woodlark', None)
(18, 'Ealing Boards', None)
(19, 'MP Moran', None)
(20, 'A E Glazing', None)
(21, 'A1 Building Supplies LTD', None)
(22, 'Abbott Wade', None)
(23, 'ABC Depot', None)
(24, 'Accu', None)
(25, 'Aggregate Industries/Holcim', None)
(26, 'Alsford Timber', None)
(27, 'Amazon - Business', None)
(28, 'AMS Ducting Supplies', None)
(29, 'Angelo Flooring', None)
(30, 'AOV Direct', None)
(31, 'B&Q', None)
(32, 'Beatsons', None)
(33, 'Belgrade Insulation', None)
(34, 'Boydens', None)
(35, 'Bradfords', None)
(36, 'Brewers Decorating Centres', None)
(37, 'Brick Wholesale', None)
(38, 'BSS (HVAC)', None)
(39, 'Builder Depot', None)
(40, 'Builders Beams', Non

In [None]:
from openai import OpenAI
import pandas as pd
from io import StringIO
import os

def call_gpt(prompt:str) -> str:
    client = OpenAI(api_key="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")

    response = client.responses.create(
        model="gpt-4o-mini",
        input=prompt,
    )
    return response.output_text

def get_sup_sheet(path, sheet_name,cols) -> pd.DataFrame:
    sup_path = os.path.join(os.getcwd(), path)
    df = pd.read_excel(sup_path, sheet_name=sheet_name, usecols=cols)
    return df

def build_prompt(table:str) -> str:
    prompt = """You are an expert in british construction, specifically materials and merchants. I have a list of companies and I want you to return the table with the website filled in. You can ignore any duplicates and make sure to skip any companies that are not findable.
    """

    prompt += "\n\nHere is the table:\n"
    prompt += table

    prompt += "\n\nmake sure to take your time and fill in all the missing websites. And return only the updated table."

    return prompt


# prompt = build_prompt(db_df.to_csv(index=False))
# response = call_gpt(prompt)

# df = pd.read_csv(StringIO(response))
# df.to_excel("output.xlsx", index=False)
# print(db_df.Merchant.to_list())

In [None]:
def get_strengths(sup_path) -> pd.Series:
    supplier_df = get_sup_sheet(
        sup_path,
        "Strengths",
        ["Accounts, Contacts and Strengths"]
    )
    return supplier_df["Accounts, Contacts and Strengths"]

def add_specialism_types(db_path,sup_path):
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    strengths = get_strengths(sup_path)
    for strength in strengths:
        if pd.notna(strength):
            cursor.execute("INSERT INTO specialism_types (name) VALUES (?);", (strength,))
    conn.commit()
    conn.close()

add_specialism_types("supplier.db", "resources/Supplier Strengths.xlsx")

[(1, 'Aggregates', None), (2, 'Blocks', None), (3, 'Bricks', None), (4, 'Building Chemicals', None), (5, 'Cement', None), (6, 'Cladding', None), (7, 'Concrete ', None), (8, 'Doors', None), (9, 'Drainage', None), (10, 'Fencing', None), (11, 'Fire Boards', None), (12, 'Flag & Kerb', None), (13, 'Foundation', None), (14, 'Insulation', None), (15, 'Landscaping', None), (16, 'Lintels', None), (17, 'Metal Stud', None), (18, 'Metalwork', None), (19, 'Panel Products', None), (20, 'Paving', None), (21, 'Plaster', None), (22, 'Roofing', None), (23, 'Roofline', None), (24, 'Sealants', None), (25, 'Timber', None), (26, 'Wallboard', None), (27, 'Joinery - Flooring', None), (28, 'Kitchens', None), (29, 'Ducting', None), (30, 'Decorating', None), (31, 'Electrical', None), (32, 'Ventilation', None), (33, 'Fixings', None), (34, 'Hire', None), (35, 'Plumbing/Heating', None), (36, 'Bathrooms/Tiling', None), (37, 'Hand Tools', None)]
