### CREATE SQLite database from CSV

In [None]:
import sqlite3
import pandas as pd

# Read the data from the CSV file
tips_df = pd.read_csv("data/tips.csv")
tips_df.head()

# Create the database
conn = sqlite3.connect("tips.db")

# Write the CSV data to a database table named "tips"; create a column named "id" to act as the primary key
row_count = tips_df.to_sql("tips", conn, if_exists="replace", index_label="id")
print(f"Successfully created a table named 'tips' with {row_count} rows")

# Leaving the connection open and using pandas to query the database for the sake of query result readability

### READ #1: Average tip percentage per day of the week

In [None]:
pd.read_sql_query('''
                  SELECT 
                    day, 
                    ROUND((SUM(tip)/SUM(total_bill) * 100), 1) AS avg_tip_percentage
                  FROM 
                    tips
                  GROUP BY 
                    day
                  ORDER BY 
                    avg_tip_percentage DESC;
                  ''', conn)

### READ #2: Maximum and minimum total bills

In [None]:
pd.read_sql_query('''
                  SELECT 
                    MIN(total_bill) as min_total_bill, 
                    MAX(total_bill) as max_total_bill
                  FROM 
                    tips;
                  ''', conn)

### READ #3: Count the number of parties for each size

In [None]:
pd.read_sql_query('''
                  SELECT 
                    size, 
                    COUNT(size) as occurrences
                  FROM 
                    tips
                  GROUP BY 
                    size;
                  ''', conn)

### READ #4: Total bill and tip for parties of size 4 or more, where the tip percentage is greater than 15%

In [None]:
pd.read_sql_query('''
                  SELECT 
                    total_bill, 
                    tip
                  FROM 
                    tips
                  WHERE 
                    size >= 4 AND 
                    ((tip/total_bill) * 100) > 15;
                  ''', conn)

### READ #5: Retrieve the total bill, tip amount, and tip percentage for each combination of day and time, sorted by tip percentage in descending order


In [None]:
pd.read_sql_query('''
                  SELECT 
                    day, 
                    time, 
                    SUM(total_bill) AS sum_total_bill, 
                    SUM(tip) as sum_tip, 
                    ROUND((SUM(tip)/SUM(total_bill) * 100), 1) AS tip_percentage
                  FROM 
                    tips
                  GROUP BY 
                    day, time
                  ORDER BY 
                    tip_percentage DESC;
                  ''', conn)

### READ #6: Find the average tip percentage for each combination of day, time, and smoker status

In [None]:
pd.read_sql_query('''
                  SELECT 
                    day, 
                    time, 
                    smoker, 
                    ROUND((SUM(tip)/SUM(total_bill) * 100), 1) AS avg_tip_percentage
                  FROM 
                    tips
                  GROUP BY 
                    day, time, smoker
                  ORDER BY 
                    avg_tip_percentage DESC;
                  ''', conn)

### READ #7: Retrieve the total bill, tip amount, and tip percentage for each sex, sorted by total bill in descending order, and limit the results to the top 5 records

In [None]:
pd.read_sql_query('''
                  SELECT 
                    sex, 
                    SUM(total_bill) AS sum_total_bill, 
                    SUM(tip) as sum_tip, 
                    ROUND((SUM(tip)/SUM(total_bill) * 100), 1) AS tip_percentage
                  FROM 
                    tips
                  GROUP BY 
                    sex
                  ORDER BY 
                    sum_total_bill DESC
                  LIMIT 5;
                  ''', conn)

### READ #8: Find the maximum and minimum tip percentage for each day and time combination, along with the corresponding total bill and tip amount

In [None]:
pd.read_sql_query('''
                  SELECT 
                    day, 
                    time, 
                    total_bill, 
                    tip, 
                    ROUND((tip/total_bill) * 100, 1) AS tip_percentage
                  FROM 
                    tips
                  WHERE 
                    (day, time, (tip/total_bill) * 100) IN (
                        SELECT 
                          day, 
                          time, 
                          MAX((tip/total_bill) * 100) AS max_tip_percentage
                        FROM 
                          tips
                        GROUP BY 
                          day, time
                    )
                    OR 
                    (day, time, (tip/total_bill) * 100) IN (
                        SELECT 
                          day, 
                          time, 
                          MIN((tip/total_bill) * 100) AS min_tip_percentage
                        FROM 
                          tips
                        GROUP BY 
                          day, time
                    )
                  ORDER BY 
                    day, time, tip_percentage DESC;
                  ''', conn)

### READ #9: Retrieve the total bill, tip amount, and tip percentage for parties of size 4 or more, where the tip percentage is greater than 15%, and the total bill is between $50 and $100

NOTE: returns no results, which I believe is accurate.

In [None]:
pd.read_sql_query('''
                  SELECT 
                    total_bill,
                    tip, 
                    ROUND((tip/total_bill) * 100, 1) AS tip_percentage
                  FROM 
                    tips
                  WHERE
                    size >= 4 AND
                    tip_percentage > 15 AND
                    total_bill BETWEEN 50 AND 100;
                  ''', conn)

### READ #10: Find the average tip percentage for each combination of day, time, and smoker status, but only include combinations with more than 5 records

In [None]:
pd.read_sql_query('''
                  SELECT 
                    day, 
                    time, 
                    smoker, 
                    ROUND((SUM(tip)/SUM(total_bill) * 100), 1) AS avg_tip_percentage
                  FROM 
                    tips
                  GROUP BY 
                    day, time, smoker
                  HAVING
                    COUNT(*) > 5
                  ''', conn)

### Additional Query #1: Find the average tip percentage based on smoking status.

In [None]:
pd.read_sql_query('''
                  SELECT 
                    smoker,  
                    ROUND((SUM(tip)/SUM(total_bill) * 100), 1) AS avg_tip_percentage
                  FROM 
                    tips
                  GROUP BY 
                    smoker
                  ''', conn)

### Additional Query #2: Total diners for each combination of day and time ordered by total diners (i.e., busiest meals)

In [None]:
pd.read_sql_query('''
                  SELECT 
                    day,
                    time,  
                    SUM(size) as total_diners
                  FROM 
                    tips
                  GROUP BY 
                    day, time
                  ORDER BY
                    total_diners DESC
                  ''', conn)

### Additional Query #3: Average tip percentage for each party size where the number of records is greater than 5.

In [None]:
pd.read_sql_query('''
                  SELECT 
                    size,
                    ROUND((SUM(tip)/SUM(total_bill) * 100), 1) AS avg_tip_percentage
                  FROM 
                    tips
                  GROUP BY 
                    size
                  HAVING
                    COUNT(*) > 5
                  ORDER BY
                    avg_tip_percentage DESC
                  ''', conn)

### Additional Query #4: For each combination of sex and time, count the number of records. This effectively answers "_Which sex is paying for a given meal?_"

In [None]:
pd.read_sql_query('''
                  SELECT 
                    sex,
                    time,
                    COUNT(*) as count
                  FROM 
                    tips
                  GROUP BY 
                    sex, time
                  ORDER BY
                    time, count DESC
                  ''', conn)

### Additional Query #5: Who smokes more, men or women?

In [None]:
pd.read_sql_query('''
                  SELECT 
                    sex,
                    COUNT(*) as count
                  FROM 
                    tips
                  WHERE
                    smoker = 'Yes'
                  GROUP BY 
                    sex
                  ORDER BY
                    count DESC
                  ''', conn)

### Close the database connection after the last READ operation

In [136]:
conn.close()

### UPDATE: 
It was determined that there was an error in the database. Please update the record that corresponds to id=10 and set smoker to Yes.

In [137]:
with sqlite3.connect("tips.db") as conn:
    cur = conn.cursor()
    data = {"id": 10,
            "smoker": "Yes"}
    try:
        cur.execute("UPDATE tips SET smoker = :smoker WHERE id = :id", data)
    except sqlite3.Error as e:
        print(f"Error {e.sqlite_errorcode}: {e.sqlite_errorname}")
    
    conn.commit()

### DELETE
Delete records from the database that have a total bill that is less than $10.

In [138]:
with sqlite3.connect("tips.db") as conn:
    cur = conn.cursor()
    
    try:
        cur.execute("DELETE FROM tips WHERE total_bill < 10;")
    except sqlite3.Error as e:
        print(f"Error {e.sqlite_errorcode}: {e.sqlite_errorname}")
    
    conn.commit()