In [21]:
import csv
import sqlite3
import pandas as pd
import time
import os

I used  sqlite3 documentation to set up this part https://docs.python.org/3/library/sqlite3.html

In [22]:
# use the connect() function to create a database and connect to it
conn = sqlite3.connect('tips_db')

# make an sql query to create the table inside the tips_db
create_table = '''
CREATE TABLE IF NOT EXISTS tips (
    id INTEGER PRIMARY KEY,
    total_bill REAL,
    tip REAL,
    sex STRING,
    smoker STRING,
    day STRING,
    time STRING,
    size INTEGER
) 
'''

# use error handling to handle any sqlite errors
try:
    with conn:
        conn.execute(create_table)
    print("Table created successfully.")
except sqlite3.Error as e:
    print(f"Error creating table: {e}")

# Function to insert data from CSV
def insert_data_from_csv(csv_path):
    # use parametrized queries to prevent sql injection attacks
    insert_query = '''
    INSERT INTO tips (total_bill, tip, sex, smoker, day, time, size) 
    VALUES (?, ?, ?, ?, ?, ?, ?)
    '''
    try:
        with conn, open(csv_path, 'r') as csv_file:
            csv_reader = csv.reader(csv_file)
            # ignore the row with the column headings
            next(csv_reader)
            cursor = conn.cursor()
            cursor.executemany(insert_query, csv_reader)
        print("Data inserted successfully.")
    except (sqlite3.Error, IOError) as e:
        print(f"Error inserting data: {e}")

# Insert data from the CSV file
insert_data_from_csv('data/tips.csv')

# use a function to run SQL queries and display results
def run_query(query):
    try:
        df = pd.read_sql_query(query, conn)
        return df
    except sqlite3.Error as e:
        print(f"An error occurred: {e}")
        return None

Table created successfully.
Data inserted successfully.


1. Retrieve the average tip percentage for each day of the week

In [23]:
query = '''
SELECT day, AVG(tip/total_bill)*100 as avg_tip_percent 
FROM tips 
GROUP BY day
'''


run_query(query)

Unnamed: 0,day,avg_tip_percent
0,Fri,16.665497
1,Sat,15.096698
2,Sun,15.590484
3,Thur,15.843259


2. Find the maximum and minimum total bill amounts

In [24]:
query = '''
SELECT max(total_bill) as max_total_bill, min(total_bill) as min_total_bill 
FROM tips
'''


run_query(query)

Unnamed: 0,max_total_bill,min_total_bill
0,50.81,3.07


3. Count the number of parties for each size

In [25]:
query = '''
SELECT size, count(size) as number_of_parties 
FROM tips 
GROUP BY size
'''


run_query(query)

Unnamed: 0,size,number_of_parties
0,1,29
1,2,3706
2,3,988
3,4,962
4,5,130
5,6,104


4. Retrieve the total bill and tip for parties of size 4 or more, where the tip percentage is greater than 15%

In [26]:
query = '''

SELECT total_bill, tip 
FROM tips 
WHERE size>=4 and ((tip/total_bill)*100)>15
'''


run_query(query)

Unnamed: 0,total_bill,tip
0,25.29,4.71
1,18.43,3.00
2,39.42,7.58
3,30.40,5.60
4,32.40,6.00
...,...,...
437,23.17,6.50
438,20.69,5.00
439,20.53,4.00
440,25.89,5.16


5. Retrieve the total bill, tip amount, and tip percentage for each combination of day and time, sorted by tip percentage in descending order

In [27]:
query = '''
SELECT day, time, total_bill, tip, ((tip/total_bill)*100) as tip_percent 
FROM tips group by day, time 
ORDER BY tip_percent desc
'''

run_query(query)

Unnamed: 0,day,time,total_bill,tip,tip_percent
0,Fri,Lunch,12.16,2.2,18.092105
1,Sat,Dinner,20.65,3.35,16.22276
2,Thur,Dinner,18.78,3.0,15.974441
3,Thur,Lunch,27.2,4.0,14.705882
4,Fri,Dinner,28.97,3.0,10.35554
5,Sun,Dinner,16.99,1.01,5.944673


6. Find the average tip percentage for each combination of day, time, and smoker status

In [28]:
query = '''
SELECT day, time, smoker, AVG((tip/total_bill)*100) as avg_tip_percent 
FROM tips 
GROUP BY day, time, smoker
'''

run_query(query)

Unnamed: 0,day,time,smoker,avg_tip_percent
0,Fri,Dinner,No,13.962237
1,Fri,Dinner,Yes,16.432276
2,Fri,Lunch,No,18.773467
3,Fri,Lunch,Yes,18.228781
4,Sat,Dinner,No,15.863542
5,Sat,Dinner,Yes,14.272451
6,Sun,Dinner,No,15.934207
7,Sun,Dinner,Yes,14.504474
8,Thur,Dinner,No,15.974441
9,Thur,Lunch,No,15.598343


7. Retrieve the total bill, tip amount, and tip percentage for each sex, sorted by total bill in descending order, and limit the results to the top 5 records

In [29]:
query = '''
SELECT sex, total_bill, tip, ((tip/total_bill)*100) as tip_percent 
FROM tips group by sex 
ORDER BY total_bill desc limit 5
'''


run_query(query)

Unnamed: 0,sex,total_bill,tip,tip_percent
0,Female,16.99,1.01,5.944673
1,Male,10.34,1.66,16.054159


8. Find the maximum and minimum tip percentage for each day and time combination, along with the corresponding total bill and tip amount

In [30]:
query = '''
SELECT day, time, total_bill, tip, max((tip/total_bill)*100) as max_tip_percent, min((tip/total_bill)*100) as min_tip_percent 
FROM tips 
GROUP BY day, time
'''


run_query(query)

Unnamed: 0,day,time,total_bill,tip,max_tip_percent,min_tip_percent
0,Fri,Dinner,28.97,3.0,26.348039,10.35554
1,Fri,Lunch,13.42,1.58,25.931446,11.773472
2,Sat,Dinner,32.83,1.17,32.57329,3.563814
3,Sun,Dinner,16.99,1.01,71.034483,5.944673
4,Thur,Dinner,18.78,3.0,15.974441,15.974441
5,Thur,Lunch,18.64,1.36,26.631158,7.296137


9. Retrieve the total bill, tip amount, and tip percentage for parties of size 4 or more, where the tip percentage is greater than 15%, and the total bill is between $50 and $100

In [31]:
query = "
SELECT total_bill, tip, ((tip/total_bill)*100) as tip_percent 
FROM tips 
WHERE size>=4 and ((tip/total_bill)*100)>15 and (total_bill>50 and total_bill<100)
"

run_query(query)

Unnamed: 0,total_bill,tip,tip_percent


10. Find the average tip percentage for each combination of day, time, and smoker status, but only include combinations with more than 5 records

In [32]:
query = '''
SELECT day, time, smoker, AVG((tip/total_bill)*100) as tip_percent, count(*) as combinations 
FROM tips 
GROUP BY day, time, smoker having combinations>5
'''


run_query(query)

Unnamed: 0,day,time,smoker,tip_percent,combinations
0,Fri,Dinner,No,13.962237,78
1,Fri,Dinner,Yes,16.432276,209
2,Fri,Lunch,No,18.773467,26
3,Fri,Lunch,Yes,18.228781,131
4,Sat,Dinner,No,15.863542,1120
5,Sat,Dinner,Yes,14.272451,1042
6,Sun,Dinner,No,15.934207,1406
7,Sun,Dinner,Yes,14.504474,445
8,Thur,Dinner,No,15.974441,26
9,Thur,Lunch,No,15.598343,994


11. Find average tip percentages for each time of day

In [33]:
query = '''
SELECT time, avg((tip/total_bill)*100) as avg_tip_percent 
FROM tips 
GROUP BY time
'''


run_query(query)

Unnamed: 0,time,avg_tip_percent
0,Dinner,15.357324
1,Lunch,16.085116


12. Find the average tip percentages for smokers and non-smokers

In [34]:
query = '''
SELECT smoker, avg((tip/total_bill)*100) as avg_tip_percent 
FROM tips 
GROUP BY smoker
'''


run_query(query)

Unnamed: 0,smoker,avg_tip_percent
0,No,15.799429
1,Yes,15.157099


13. Find average total bill amount and the average tip percentage for each party size

In [35]:
query = '''
SELECT size, avg((tip/total_bill)*100) as avg_tip_percent 
FROM tips 
GROUP BY size
'''

run_query(query)

Unnamed: 0,size,avg_tip_percent
0,1,18.66333
1,2,15.914873
2,3,15.215685
3,4,14.594901
4,5,14.149549
5,6,15.62292


14. Find number of parties and average party size for each day and time.

In [36]:
query = '''
SELECT day,time, COUNT(*) AS number_of_parties, AVG(size) AS av_party_size 
FROM tips group by day, time 
ORDER BY tip DESC
'''


run_query(query)

Unnamed: 0,day,time,number_of_parties,av_party_size
0,Thur,Lunch,1436,2.506964
1,Sat,Dinner,2162,2.564292
2,Fri,Dinner,287,2.181185
3,Thur,Dinner,26,2.0
4,Fri,Lunch,157,2.159236
5,Sun,Dinner,1851,2.898974


15. Find average tip percentage and average total bill for each gender.

In [37]:
query = '''
SELECT sex,AVG((tip/total_bill)*100) AS avg_tip_percent,AVG(total_bill) AS avg_total_bill 
FROM tips 
GROUP BY sex
'''


run_query(query)

Unnamed: 0,sex,avg_tip_percent,avg_total_bill
0,Female,16.197102,18.835914
1,Male,15.195979,21.619469


### UPDATE
It was determined that there was an error in the database. Please update the record that corresponds to id=10 and set smoker to Yes. 

In [38]:
def update_smoker_status(id, status):
    query = "UPDATE tips SET smoker = ? WHERE id = ?"
    try:
        with conn:
            cursor = conn.cursor()
            cursor.execute(query, (status, id))
            if cursor.rowcount:
                print(f"Record with id={id} updated successfully")
                return cursor.rowcount
            print(f"No record found with id={id}")
    except sqlite3.Error as e:
        print(f"An error occurred: {e}")
    return None

# Update and verify
if updated_count = update_smoker_status(10, 'Yes'):
    print(f"Updated {updated_count} record")
    result = run_query(f"SELECT * FROM tips WHERE id = 10")
    display(result)
else:
    print("No record was updated or an error occurred")

Record with id=10 updated successfully.
Updated 1 record(s).


Unnamed: 0,id,total_bill,tip,sex,smoker,day,time,size
0,10,14.78,3.23,Male,Yes,Sun,Dinner,2


DELETE

In [39]:
def delete_records_below(dollars):
    query = f"DELETE FROM tips WHERE total_bill < {dollars}"
    try:
        with conn:
            cursor = conn.cursor()
            cursor.execute(query)
            deleted_count = cursor.rowcount
            print(f"Deleted {deleted_count} record(s) with total bill less than ${dollars}.")
            return deleted_count
    except sqlite3.Error as e:
        print(f"An error occurred: {e}")
        return None

# implement delete function
dollars = 10  # $10 threshold
deleted_count = delete_records_below(dollars)

if deleted_count is not None:
    print(f"Successfully deleted {deleted_count} record(s).")
    
    # Verify the deletion
    verify_query = f"SELECT COUNT(*) as remaining_count FROM tips WHERE total_bill < {dollars}"
    result = run_query(verify_query)
    if result is not None:
        display(result)
        remaining_count = result['remaining_count'].iloc[0]
        print(f"Remaining records with total bill < ${dollars}: {remaining_count}")
    else:
        print("Failed to verify deletion.")
else:
    print("Deletion operation failed.")

Deleted 17 record(s) with total bill less than $10.
Successfully deleted 17 record(s).


Unnamed: 0,remaining_count
0,0


Remaining records with total bill < $10: 0


Close connection

In [40]:
conn.close()