In [1]:
# Cell 1 — Imports & config
import pandas as pd
from pathlib import Path

# === CHOOSE YOUR CSV FILE HERE ===
# Options: 'menu.csv', 'orders.csv', 'registration.csv', 'staff.csv'
csv_file_name = 'orders.csv'
# =================================

CSV_PATH = Path(f'../doc/{csv_file_name}')
OUTPUT_SQL = Path(f'{CSV_PATH.stem}_inserts.sql')

# Helper: SQL quote and NULL handling
def sql_quote(val):
    if pd.isna(val):
        return "NULL"
    s = str(val)
    if s.strip() == "":
        return "NULL"
    s = s.replace("'", "''")
    return f"'{s}'"

In [2]:
# Cell 2 — Load CSV and display headers
df = pd.read_csv(CSV_PATH)

print(f"Headers for {CSV_PATH.name}:")
for col in df.columns:
    print(f'- {col}')

print('First 5 rows:')
df.head()

Headers for orders.csv:
- Date
- Time
- Order
- Payment
- Card
- CardType
- Item
- TotalPrice
- Phone
- Firstname
- Lastname
- Staff
First 5 rows:


Unnamed: 0,Date,Time,Order,Payment,Card,CardType,Item,TotalPrice,Phone,Firstname,Lastname,Staff
0,2024-03-01,10:15:51,20240301001,card,3742-8375-6443-8590,americanexpress,Rendang,4.0,,,,STAFF-01
1,2024-03-01,12:19:23,20240301002,card,5108-7574-2920-6803,mastercard,Ayam Balado,14.0,93627414.0,Ignazio,Abrahmer,STAFF-03
2,2024-03-01,12:19:23,20240301002,card,5108-7574-2920-6803,mastercard,Ayam Balado,14.0,93627414.0,Ignazio,Abrahmer,STAFF-03
3,2024-03-01,12:19:23,20240301002,card,5108-7574-2920-6803,mastercard,Ayam Balado,14.0,93627414.0,Ignazio,Abrahmer,STAFF-03
4,2024-03-01,12:19:23,20240301002,card,5108-7574-2920-6803,mastercard,Ayam Balado,14.0,93627414.0,Ignazio,Abrahmer,STAFF-04


In [None]:
# Cell 3 — Select which columns to include in the INSERT statements
# === CHOOSE YOUR COLUMNS HERE ===
# You can now select from the detected columns or use all of them
# selected_columns = list(df.columns)
# Or uncomment below to manually select from detected columns
selected_columns = ["Order","Date","Time","Payment"]
# =================================

# Verify that the selected columns exist in the DataFrame
missing_cols = [col for col in selected_columns if col not in df.columns]
if missing_cols:
    raise ValueError(f"The following columns were not found in the CSV: {missing_cols}")

df_selected = df[selected_columns].drop_duplicates()
print(f"Selected columns for INSERT: {', '.join(df_selected.columns)}")
df_selected.head()

Selected columns for INSERT: Order, Date, Time, Payment


Unnamed: 0,Order,Date,Time,Payment
0,20240301001,2024-03-01,10:15:51,card
1,20240301002,2024-03-01,12:19:23,card
5,20240301003,2024-03-01,13:46:33,card
8,20240301004,2024-03-01,13:48:15,card
10,20240301005,2024-03-01,15:39:48,card


In [5]:
# Cell 4 — Generate and preview PostgreSQL INSERT statements
inserts = []
table_name = CSV_PATH.stem

for _, row in df_selected.iterrows():
    values = ', '.join([sql_quote(row[col]) for col in df_selected.columns])
    stmt = f"INSERT INTO {table_name} ({', '.join(df_selected.columns)}) VALUES ({values});"
    inserts.append(stmt)

# Preview first 5 inserts
print(f"--- Preview of INSERT statements for table '{table_name}' ---")
for i in inserts[:5]:
    print(i)

--- Preview of INSERT statements for table 'orders' ---
INSERT INTO orders (Order, Date, Time, Payment) VALUES ('20240301001', '2024-03-01', '10:15:51', 'card');
INSERT INTO orders (Order, Date, Time, Payment) VALUES ('20240301002', '2024-03-01', '12:19:23', 'card');
INSERT INTO orders (Order, Date, Time, Payment) VALUES ('20240301003', '2024-03-01', '13:46:33', 'card');
INSERT INTO orders (Order, Date, Time, Payment) VALUES ('20240301004', '2024-03-01', '13:48:15', 'card');
INSERT INTO orders (Order, Date, Time, Payment) VALUES ('20240301005', '2024-03-01', '15:39:48', 'card');


In [6]:
# Cell 5 — (Optional) Save all INSERTs to a .sql file
if inserts:
    OUTPUT_SQL.write_text('\n'.join(inserts), encoding='utf-8')
    print(f"Successfully saved {len(inserts)} INSERT statements to: {OUTPUT_SQL}")
else:
    print("No INSERT statements were generated.")

Successfully saved 2906 INSERT statements to: orders_inserts.sql
