In [8]:
# Cell 1 — Imports & config
import pandas as pd
import re
from pathlib import Path

# Path to your orders CSV
CSV_PATH = Path("../doc/registration.csv")

# Output .sql file for INSERTs (optional)
OUTPUT_SQL = Path("reg_inserts.sql")

# Helper: SQL quote and NULL handling
def sql_quote(val):
    if val is None:
        return "NULL"
    s = str(val)
    # treat empty strings as NULL
    if s.strip() == "":
        return "NULL"
    # escape single quotes
    s = s.replace("'", "''")
    return f"'{s}'"

def is_valid_sg_phone(s):
    # exactly 8 digits, all numeric
    return bool(re.fullmatch(r"\d{8}", s or ""))


In [9]:
# Cell 2 — Load CSV into DataFrame
df = pd.read_csv(CSV_PATH)

# Ensure expected columns exist (will raise if missing)
expected = ["Date","Time","Phone","Firstname","Lastname"]
missing = [c for c in expected if c not in df.columns]
if missing:
    raise ValueError(f"CSV missing columns: {missing}")

df.head()


Unnamed: 0,Date,Time,Phone,Firstname,Lastname
0,2024-03-01,12:19:23,93627414,Ignazio,Abrahmer
1,2024-03-01,15:39:48,89007281,Bernard,Cowlard
2,2024-03-01,16:19:03,81059611,Laurette,Birney
3,2024-03-01,18:39:04,93342383,Corby,Crinage
4,2024-03-01,19:22:02,85625766,Mal,Bavister


In [11]:
# Cell 3 — Select requested columns and DROP duplicates by Order (keep first)
cols_needed = ["Date","Time","Phone","Firstname","Lastname"]
df_sel = df[cols_needed].copy()

# Drop duplicates by Order, keeping the first occurrence
# df_uniq = df_sel.drop_duplicates(subset=["Order"], keep="first").reset_index(drop=True)

# Basic normalization
# - Strip whitespace
# for c in ["Card","CardType"]:
#     df_uniq[c] = df_uniq[c].astype(str).str.strip()

# Correctly clean the 'Phone' column
# df_uniq["Phone"] = df_uniq["Phone"].apply(lambda x: str(int(x)) if pd.notna(x) and str(x).replace('.', '', 1).isdigit() else None)

# # Convert TotalPrice to numeric (coerce invalid -> NaN -> treat as 0.0)
# df_uniq["TotalPrice"] = pd.to_numeric(df_uniq["TotalPrice"], errors="coerce").fillna(0.0)

# df_uniq.head()
df_sel.head()

Unnamed: 0,Date,Time,Phone,Firstname,Lastname
0,2024-03-01,12:19:23,93627414,Ignazio,Abrahmer
1,2024-03-01,15:39:48,89007281,Bernard,Cowlard
2,2024-03-01,16:19:03,81059611,Laurette,Birney
3,2024-03-01,18:39:04,93342383,Corby,Crinage
4,2024-03-01,19:22:02,85625766,Mal,Bavister


In [None]:
# Cell 4 — Derive fields for bills INSERTs
# bills(bill_date, bill_time, bill_id, payment, total_bill, card_number, card_type, phone)

def infer_payment(card):
    return "card" if (card is not None and str(card).strip() != "") else "cash"
cols_needed = ["Date","Time","Phone","Firstname","Lastname"]
rows = []
for _, r in df_sel.iterrows():
    bill_date   = r["Date"]           # yyyy-mm-dd
    bill_time   = r["Time"]           # hh:mm:ss
    phone_raw   = r["Phone"]     # text id
    firstname   = r["Firstname"]
    lastname   = r["Lastname"]

    rows.append({
        "bill_date": bill_date,
        "bill_time": bill_time,
        "phone_raw": phone_raw,
        "firstname": firstname,
        "lastname": lastname,
    })

bills_df = pd.DataFrame(rows)
bills_df.head()

Unnamed: 0,bill_date,bill_time,phone_raw,firstname,lastname
0,2024-03-01,12:19:23,93627414,Ignazio,Abrahmer
1,2024-03-01,15:39:48,89007281,Bernard,Cowlard
2,2024-03-01,16:19:03,81059611,Laurette,Birney
3,2024-03-01,18:39:04,93342383,Corby,Crinage
4,2024-03-01,19:22:02,85625766,Mal,Bavister


In [None]:
# Cell 4 — Derive fields for bills INSERTs
# bills(bill_date, bill_time, bill_id, payment, total_bill, card_number, card_type, phone)

def infer_payment(card):
    return "card" if (card is not None and str(card).strip() != "") else "cash"

rows = []
for _, r in df_sel.iterrows():
    bill_date   = r["Date"]           # yyyy-mm-dd
    bill_time   = r["Time"]           # hh:mm:ss
    bill_id     = str(r["Order"])     # text id
    card_number = r["Card"] if str(r["Card"]).strip() != "" else None
    card_type   = r["CardType"] if str(r["CardType"]).strip() != "" else None
    total_bill  = float(r["TotalPrice"])
    
    # Corrected phone number extraction
    phone_raw = r["Phone"]
    
    # Check if the phone number is a float and not NaN
    if pd.isna(phone_raw):
        phone_cleaned = None
    else:
        try:
            # Convert the float to an integer to remove the .0, then to a string
            # This handles cases like 93627414.0
            phone_cleaned = str(int(phone_raw))
        except (ValueError, TypeError):
            # Fallback for non-numeric or malformed data
            phone_cleaned = str(phone_raw).strip()
    
    # Validate the cleaned phone number
    phone = phone_cleaned if is_valid_sg_phone(phone_cleaned) else None

    payment = infer_payment(card_number)

    rows.append({
        "bill_date": bill_date,
        "bill_time": bill_time,
        "bill_id":   bill_id,
        "payment":   payment,
        "total_bill": total_bill,
        "card_number": card_number,
        "card_type":   card_type,
        "phone":       phone
    })

bills_df = pd.DataFrame(rows)
bills_df.head()

Unnamed: 0,bill_date,bill_time,bill_id,payment,total_bill,card_number,card_type,phone
0,2024-03-01,10:15:51,20240301001,card,4.0,3742-8375-6443-8590,americanexpress,
1,2024-03-01,12:19:23,20240301002,card,14.0,5108-7574-2920-6803,mastercard,
2,2024-03-01,13:46:33,20240301003,card,9.0,3466-5960-1418-4580,americanexpress,
3,2024-03-01,13:48:15,20240301004,card,8.0,3379-4110-3466-1310,americanexpress,
4,2024-03-01,15:39:48,20240301005,card,3.5,3742-8382-6101-0570,americanexpress,


In [7]:
# Cell 5 — Generate PostgreSQL INSERT statements for `bills`
# Assumes your table definition:
# bills(bill_date DATE, bill_time TIME, bill_id VARCHAR(11) PK, payment VARCHAR(4), total_bill NUMERIC,
#       card_number VARCHAR(19), card_type VARCHAR(100), phone VARCHAR(8))

inserts = []
for _, r in bills_df.iterrows():
    stmt = (
        "INSERT INTO registration VALUES ("
        f"{sql_quote(r['bill_date'])}, "
        f"{sql_quote(r['bill_time'])}, "
        f"{sql_quote(r['phone_raw'])}, "
        f"{sql_quote(r['firstname'])}, "
        f"{sql_quote(r['lastname'])}, "
        ");"
    )
    inserts.append(stmt)

# Preview first few inserts
print("\n".join(inserts[:5]))


INSERT INTO registration VALUES ('2024-03-01', '12:19:23', '93627414', 'Ignazio', 'Abrahmer', );
INSERT INTO registration VALUES ('2024-03-01', '15:39:48', '89007281', 'Bernard', 'Cowlard', );
INSERT INTO registration VALUES ('2024-03-01', '16:19:03', '81059611', 'Laurette', 'Birney', );
INSERT INTO registration VALUES ('2024-03-01', '18:39:04', '93342383', 'Corby', 'Crinage', );
INSERT INTO registration VALUES ('2024-03-01', '19:22:02', '85625766', 'Mal', 'Bavister', );


In [18]:
# Cell 6 — (Optional) Save all INSERTs to a .sql file
OUTPUT_SQL.write_text("\n".join(inserts), encoding="utf-8")
OUTPUT_SQL


WindowsPath('bills_inserts.sql')