In [None]:
import pandas as pd
from sqlalchemy import create_engine
import datetime

# mySQL setup (provided by user)
username='root'
password='rootuser'
host='127.0.0.1'
port='3306'

databaseNameSource='sakila'

# connection string format (provided by user)
connectionStrSource=f"mysql+pymysql://{username}:{password}@{host}:{port}/{databaseNameSource}"
engineSource = create_engine(connectionStrSource)

In [None]:

print("Inserting uncleaned data into the sakila source database...")

try:
    with engineSource.connect() as connection:
        # --- 1. Insert a staff member with a NULL email ---
        print("Inserting staff with NULL email...")
        connection.execute(
            """
            INSERT INTO staff (first_name, last_name, address_id, store_id, username, password, active, picture, last_update)
            VALUES ('Test', 'StaffNullEmail', 1, 1, 'testnullem', NULL, 1, NULL, NOW())
            """
        )
        print("Staff inserted.")

        # --- 2. Insert a film with NULL description and special features ---
        print("Inserting film with NULL description and special features...")
        connection.execute(
            """
            INSERT INTO film (title, description, release_year, language_id, rental_duration, rental_rate, length, replacement_cost, rating, special_features, last_update)
            VALUES ('Uncleaned Film', NULL, 2023, 1, 3, 4.99, 120, 19.99, 'G', NULL, NOW())
            """
        )
        print("Film inserted.")
        
        # Insert a new customer to associate with a problematic rental/payment later
        print("Inserting a new customer for testing...")
        connection.execute(
            """
            INSERT INTO customer (store_id, first_name, last_name, email, address_id, active, create_date, last_update)
            VALUES (1, 'Problem', 'Customer', 'problem@example.com', 1, 1, NOW(), NOW())
            """
        )
        problem_customer_id_query = "SELECT customer_id FROM customer WHERE email = 'problem@example.com'"
        problem_customer_id = pd.read_sql(problem_customer_id_query, engineSource).iloc[0, 0]
        print(f"Problem customer ID: {problem_customer_id}")

        # Insert a new inventory item (film_id=1, store_id=1) for a new rental
        print("Inserting a new inventory item for testing...")
        connection.execute(
            """
            INSERT INTO inventory (film_id, store_id, last_update)
            VALUES (1, 1, NOW())
            """
        )
        problem_inventory_id_query = "SELECT inventory_id FROM inventory WHERE film_id = 1 AND store_id = 1 ORDER BY last_update DESC LIMIT 1"
        problem_inventory_id = pd.read_sql(problem_inventory_id_query, engineSource).iloc[0, 0]
        print(f"Problem inventory ID: {problem_inventory_id}")

        # --- 3. Insert a rental with a NULL return_date (common scenario, not strictly "dirty" but tests NULL handling) ---
        print("Inserting rental with NULL return_date...")
        connection.execute(
            f"""
            INSERT INTO rental (rental_date, inventory_id, customer_id, staff_id, last_update)
            VALUES (NOW(), {problem_inventory_id}, {problem_customer_id}, 1, NOW())
            """
        )
        problem_rental_id_query = f"SELECT rental_id FROM rental WHERE inventory_id = {problem_inventory_id} AND customer_id = {problem_customer_id} ORDER BY rental_date DESC LIMIT 1"
        problem_rental_id = pd.read_sql(problem_rental_id_query, engineSource).iloc[0,0]
        print(f"Problem rental ID: {problem_rental_id}")

        # --- 4. Insert a payment with NULL amount and/or a non-existent rental_id (to test robustness) ---
        print("Inserting payment with NULL amount and for a previously created rental...")
        connection.execute(
            f"""
            INSERT INTO payment (customer_id, staff_id, rental_id, amount, payment_date, last_update)
            VALUES ({problem_customer_id}, 1, {problem_rental_id}, NULL, NOW(), NOW())
            """
        )
        print("Payment with NULL amount inserted.")

    print("Uncleaned data insertion complete.")

except Exception as e:
    print(f"An error occurred during data insertion: {e}")