In [1]:
import pandas as pd
from pathlib import Path
from collections import OrderedDict

# Set file paths for input CSVs and output SQL
CSV_DIR = Path('/home/jianwei07/dev/mcomp-projects/it5008_project/doc')  # Assumes CSVs are in the same directory
OUTPUT_SQL_FILE = 'data_draft.sql'

In [2]:
def sql_quote(val):
    """Formats a value for an SQL string, handling NULLs and escaping quotes."""
    if pd.isna(val) or val is None or str(val).strip() == '':
        return 'NULL'
    s = str(val).replace("'", "''")
    return f"'{s}'"

def format_values(row, columns):
    """Formats a list of values from a DataFrame row for an SQL INSERT statement."""
    return ', '.join([sql_quote(row[col]) for col in columns])


In [3]:
print("Reading CSV files...")
menu_df = pd.read_csv(CSV_DIR / 'menu.csv')
# registration_df = pd.read_csv(CSV_DIR / 'registration.csv')
staff_df = pd.read_csv(CSV_DIR / 'staff.csv')
# orders_df = pd.read_csv(CSV_DIR / 'orders.csv')
print("All CSV files have been read successfully.")

Reading CSV files...
All CSV files have been read successfully.


In [4]:
print("Generating INSERT statements...")

# Dictionary to hold all generated INSERT statements
inserts = OrderedDict()

# --- 4.1. Cuisines --- 
cuisines_from_menu = menu_df['Cuisine'].unique()
cuisines_from_staff = staff_df['Cuisine'].unique()
all_cuisines = pd.Series(pd.concat([pd.Series(cuisines_from_menu), pd.Series(cuisines_from_staff)])).unique()
cuisines_df = pd.DataFrame(all_cuisines, columns=['cuisine_name'])
inserts['cuisines'] = [f"INSERT INTO cuisines (cuisine_name) VALUES ({sql_quote(row['cuisine_name'])});" for _, row in cuisines_df.iterrows()]

# --- 4.2. Menu --- 
# The menu table now only contains item and price, the cuisine is handled by menu_belongs_to
menu_df_clean = menu_df[['Item', 'Price']].copy().drop_duplicates(subset=['Item'])
menu_df_clean = menu_df_clean.rename(columns={'Item': 'item', 'Price': 'price'})
inserts['menu'] = [f"INSERT INTO menu (item, price) VALUES ({format_values(row, ['item', 'price'])});" for _, row in menu_df_clean.iterrows()]

# --- 4.3. Menu_belongs_to (Junction Table) ---
menu_belongs_to_df = menu_df[['Item', 'Cuisine']].copy()
menu_belongs_to_df = menu_belongs_to_df.rename(columns={'Item': 'item', 'Cuisine': 'cuisine_name'})
inserts['menu_belongs_to'] = [f"INSERT INTO menu_belongs_to (item, cuisine_name) VALUES ({format_values(row, ['item', 'cuisine_name'])});" for _, row in menu_belongs_to_df.iterrows()]

# --- 4.4. Staff --- 
staff_df_clean = staff_df.drop_duplicates(subset=['Staff']).copy()
staff_df_clean = staff_df_clean.rename(columns={'Staff': 'staff_id', 'Name': 'staff_name'})
inserts['staff'] = [f"INSERT INTO staff (staff_id, staff_name) VALUES ({format_values(row, ['staff_id', 'staff_name'])});" for _, row in staff_df_clean.iterrows()]

# --- 4.5. Staff_can_prepare (Junction Table) --- 
staff_cuisine_df = staff_df[['Staff', 'Cuisine']].copy()
staff_cuisine_df = staff_cuisine_df.rename(columns={'Staff': 'staff_id', 'Cuisine': 'cuisine_name'})
inserts['staff_can_prepare'] = [f"INSERT INTO staff_can_prepare (staff_id, cuisine_name) VALUES ({format_values(row, ['staff_id', 'cuisine_name'])});" for _, row in staff_cuisine_df.iterrows()]


Generating INSERT statements...


In [5]:
# --- Section 5: Save to data.sql file ---
with open(OUTPUT_SQL_FILE, 'w') as f:
    f.write("--\n")
    f.write("-- File: data.sql\n")
    f.write("-- Description: INSERT statements for all tables\n")
    f.write("-- Generated automatically by generate_data.py\n")
    f.write("--\n\n")

    for table_name, statement_list in inserts.items():
        if statement_list:
            f.write(f"--\n-- Data for table: {table_name}\n--\n")
            f.write('\n'.join(statement_list))
            f.write('\n\n')
    
    f.write("-- Simple query to show data is successfully inserted\n")
    f.write("SELECT COUNT(*) FROM orders;\n")

print(f"Successfully saved all INSERT statements to {OUTPUT_SQL_FILE}")

Successfully saved all INSERT statements to data_draft.sql
