In [1]:
import sqlite3
import pandas as pd

CREATE:

In [38]:
try:
    # Read the csv file and put it into pandas dataframe
    df = pd.read_csv('data/tips.csv')

    # Create a database 'tips.db', with data from df dataframe
    with sqlite3.connect('tips.db') as conn:
        df.to_sql('tips', conn, if_exists='replace', index = False)

except Exception as e:
        print(e.with_traceback(None))

conn.close()

READ:
Query 1-5:

In [33]:
try:
    with sqlite3.connect('tips.db') as conn:
        # Create a cursor
        cursor = conn.cursor()
        
        # Query 1
        print("\n1. Retrieve the average tip percentage for each day of the week")
        query1 = '''SELECT day, 
                    ROUND(SUM(tip)/SUM(total_bill)*100,2) AS tip_pct 
                    FROM tips 
                    GROUP BY day'''
        cursor.execute(query1)
        print("(day, tip_pct)")
        result = cursor.fetchall()
        if result:
            for row in result:
                print(row)
        else:
            print("None")

        # Query 2
        print("\n2. Find the maximum and minimum total bill amounts")
        query2 = '''SELECT MIN(total_bill) AS min_bill, 
                    MAX(total_bill) AS max_bill 
                    FROM tips'''
        cursor.execute(query2)
        print("(min_bill, max_bill)")
        result = cursor.fetchall()
        if result:
            for row in result:
                print(row)
        else:
            print("None")

        # Query 3
        print("\n3. Count the number of parties for each size")
        query3 = '''SELECT size, 
                    COUNT(size) AS number_of_parties
                    FROM tips 
                    GROUP BY size'''
        cursor.execute(query3)
        print("(size, number_of_parties)")
        result = cursor.fetchall()
        if result:
            for row in result:
                print(row)
        else:
            print("None")

        # Query 4
        print("\n4. Retrieve the total bill and tip for parties of size 4 or more, where the tip percentage is greater than 15%")
        query4 = '''SELECT total_bill, 
                    tip 
                    FROM tips 
                    WHERE size >= :querysize AND (tip/total_bill)*100 > :querypct'''
        cursor.execute(query4, {"querysize": 4, "querypct": 15})
        print("(total_bill, tip)")
        result = cursor.fetchall()
        if result:
            for row in result:
                print(row)
        else:
            print("None")
        
        # Query 5
        print("\n5. Retrieve the total bill, tip amount, and tip percentage for each combination of day and time, sorted by tip percentage in descending order")
        query5 = '''SELECT day, 
                    time, 
                    SUM(total_bill) AS sum_bill, 
                    SUM(tip) AS sum_tip, 
                    ROUND(SUM(tip)/SUM(total_bill)*100,2) AS tip_pct 
                    FROM tips 
                    GROUP BY day, time
                    ORDER BY tip_pct DESC'''
        cursor.execute(query5)
        print("(day, time, sum_bill, sum_tip, tip_pct)")
        result = cursor.fetchall()
        if result:
            for row in result:
                print(row)
        else:
            print("None")

except Exception as e:
        print(e.with_traceback(None))

conn.close()


1. Retrieve the average tip percentage for each day of the week
(day, tip_pct)
('Fri', 15.94)
('Sat', 14.64)
('Sun', 15.2)
('Thur', 15.67)

2. Find the maximum and minimum total bill amounts
(min_bill, max_bill)
(3.07, 50.81)

3. Count the number of parties for each size
(size, number_of_parties)
(1, 4)
(2, 156)
(3, 38)
(4, 37)
(5, 5)
(6, 4)

4. Retrieve the total bill and tip for parties of size 4 or more, where the tip percentage is greater than 15%
(total_bill, tip)
(25.29, 4.71)
(18.43, 3.0)
(39.42, 7.58)
(30.4, 5.6)
(32.4, 6.0)
(25.56, 4.34)
(18.29, 3.76)
(29.93, 5.07)
(34.3, 6.7)
(27.05, 5.0)
(29.85, 5.14)
(21.5, 3.5)
(23.17, 6.5)
(20.69, 5.0)
(20.53, 4.0)
(25.89, 5.16)
(48.33, 9.0)

5. Retrieve the total bill, tip amount, and tip percentage for each combination of day and time, sorted by tip percentage in descending order
(day, time, sum_bill, sum_tip, tip_pct)
('Fri', 'Lunch', 89.92, 16.68, 18.55)
('Thur', 'Dinner', 18.78, 3.0, 15.97)
('Thur', 'Lunch', 1077.55, 168.83, 15.67)


Query 6-10:

In [34]:
try:
    with sqlite3.connect('tips.db') as conn:
        # Create a cursor
        cursor = conn.cursor()
        
        # Query 6
        print("\n6. Find the average tip percentage for each combination of day, time, and smoker status")
        query6 = '''SELECT day, 
                    time,
                    smoker,
                    ROUND(SUM(tip)/SUM(total_bill)*100,2) AS tip_pct 
                    FROM tips 
                    GROUP BY day, time, smoker'''
        cursor.execute(query6)
        print("(day, time, smoker, tip_pct)")
        result = cursor.fetchall()
        if result:
            for row in result:
                print(row)
        else:
            print("None")

        # Query 7
        print("\n7. Retrieve the total bill, tip amount, and tip percentage for each sex, sorted by total bill in descending order, and limit the results to the top 5 records")
        query7 = '''SELECT sex,
                    SUM(total_bill) AS sum_bill, 
                    SUM(tip) AS sum_tip, 
                    ROUND(SUM(tip)/SUM(total_bill)*100,2) AS tip_pct 
                    FROM tips 
                    GROUP BY sex
                    ORDER BY sum_bill DESC
                    LIMIT :querylimit'''
        cursor.execute(query7, {"querylimit": 5})
        print("(sex, sum_bill, sum_tip, tip_pct)")
        result = cursor.fetchall()
        if result:
            for row in result:
                print(row)
        else:
            print("None")

        # Query 8
        print("\n8. Find the maximum and minimum tip percentage for each day and time combination, along with the corresponding total bill and tip amount")
        query8 = '''SELECT day,
                    time,
                    "%tip max" AS aggregate_type, 
                    ROUND(MAX(tip/total_bill)*100,2) AS tip_pct, 
                    total_bill, 
                    tip
                    FROM tips 
                    GROUP BY day, time
                    UNION
                    SELECT day,
                    time,
                    "%tip min" AS aggregate_type, 
                    ROUND(MIN(tip/total_bill)*100,2) AS tip_pct, 
                    total_bill, 
                    tip
                    FROM tips 
                    GROUP BY day, time
                    ORDER BY day, time, aggregate_type'''
        cursor.execute(query8)
        print("(day, time, aggregate_type, tip_pct, total_bill, tip)")
        result = cursor.fetchall()
        if result:
            for row in result:
                print(row)
        else:
            print("None")

        # Query 9
        print("\n9. Retrieve the total bill, tip amount, and tip percentage for parties of size 4 or more, where the tip percentage is greater than 15%, and the total bill is between $50 and $100")
        query9 = '''SELECT total_bill, 
                    tip, 
                    ROUND(tip/total_bill*100,2) as tip_pct
                    FROM tips 
                    WHERE size >= :querysize AND tip_pct > :querypct AND total_bill BETWEEN :querybillmin AND :querybillmax'''
        cursor.execute(query9, {"querysize": 4, "querypct": 15, "querybillmin": 50, "querybillmax": 100})
        print("(total_bill, tip, tip_pct)")
        result = cursor.fetchall()
        if result:
            for row in result:
                print(row)
        else:
            print("None")

        # Query 10
        print("\n10. Find the average tip percentage for each combination of day, time, and smoker status, but only include combinations with more than 5 records")
        query10 = '''SELECT day,
                    time,
                    smoker,
                    ROUND(SUM(tip)/SUM(total_bill)*100,2) as tip_pct,
                    COUNT(tip) AS count_record
                    FROM tips 
                    GROUP BY day, time, smoker
                    HAVING count_record > :mincount'''
        cursor.execute(query10, {"mincount": 5})
        print("(day, time, smoker, tip_pct, count_record)")
        result = cursor.fetchall()
        if result:
            for row in result:
                print(row)
        else:
            print("None")
        
except Exception as e:
        print(e.with_traceback(None))

conn.close()


6. Find the average tip percentage for each combination of day, time, and smoker status
(day, time, smoker, tip_pct)
('Fri', 'Dinner', 'No', 14.3)
('Fri', 'Dinner', 'Yes', 15.16)
('Fri', 'Lunch', 'No', 18.77)
('Fri', 'Lunch', 'Yes', 18.5)
('Sat', 'Dinner', 'No', 15.78)
('Sat', 'Dinner', 'Yes', 13.51)
('Sun', 'Dinner', 'No', 15.45)
('Sun', 'Dinner', 'Yes', 14.58)
('Thur', 'Dinner', 'No', 15.97)
('Thur', 'Lunch', 'No', 15.62)
('Thur', 'Lunch', 'Yes', 15.79)

7. Retrieve the total bill, tip amount, and tip percentage for each sex, sorted by total bill in descending order, and limit the results to the top 5 records
(sex, sum_bill, sum_tip, tip_pct)
('Male', 3256.82, 485.07, 14.89)
('Female', 1570.95, 246.51, 15.69)

8. Find the maximum and minimum tip percentage for each day and time combination, along with the corresponding total bill and tip amount
(day, time, aggregate_type, tip_pct, total_bill, tip)
('Fri', 'Dinner', '%tip max', 26.35, 16.32, 4.3)
('Fri', 'Dinner', '%tip min', 10.36, 

Query 11-15:

In [35]:
try:
    with sqlite3.connect('tips.db') as conn:
        # Create a cursor
        cursor = conn.cursor()
        
        # Query 11
        print("\n11. Find revenue contribution percentage (total_bill/sum(total_bill)) for each combination of day and time")
        query11 = '''SELECT tips.day, 
                        tips.time, 
                        ROUND(SUM(tips.total_bill)/T1.sum_all_bill*100,2) AS revenue_contribution_pct
                        FROM tips, 
                            (SELECT SUM(total_bill) AS sum_all_bill
                            FROM tips) AS T1
                        GROUP BY tips.day, tips.time'''
        cursor.execute(query11)
        print("(day, time, revenue_contribution_pct)")
        result = cursor.fetchall()
        if result:
            for row in result:
                print(row)
        else:
            print("None")

        # Query 12
        print("\n12. Find tip contribution percentage (tip/sum(tip)) for each combination of sex and smoker status")
        query12 = '''SELECT tips.sex, 
                        tips.smoker, 
                        ROUND(SUM(tips.tip)/T1.sum_all_tip*100,2) AS tip_contribution_pct
                        FROM tips, 
                            (SELECT SUM(tip) AS sum_all_tip
                            FROM tips) AS T1
                        GROUP BY tips.sex, tips.smoker'''
        cursor.execute(query12)
        print("(sex, smoker, tip_contribution_pct)")
        result = cursor.fetchall()
        if result:
            for row in result:
                print(row)
        else:
            print("None")

        # Query 13
        print("\n13. Find maximum and minimum of bill amount, for each combination of sex, day, and time")
        query13 = '''SELECT sex,
                    day,
                    time,
                    "bill max" AS aggregate_type,
                    MAX(total_bill) as bill_amount
                    FROM tips
                    GROUP BY sex, day, time
                    UNION
                    SELECT sex,
                    day,
                    time,
                    "bill min" AS aggregate_type,
                    MIN(total_bill) as bill_amount
                    FROM tips 
                    GROUP BY sex, day, time
                    ORDER BY sex, day, time, aggregate_type'''
        cursor.execute(query13)
        print("(sex, day, time, aggregate_type, bill_amount)")
        result = cursor.fetchall()
        if result:
            for row in result:
                print(row)
        else:
            print("None")

        # Query 14
        print("\n14. Find average spending per head (total_bill/size) for each combination of day and time")
        query14 = '''SELECT day, 
                    time, 
                    ROUND(SUM(total_bill)/SUM(size),2) AS spending_per_head
                    FROM tips 
                    GROUP BY day, time'''
        cursor.execute(query14)
        print("(day, time, spending_per_head)")
        result = cursor.fetchall()
        if result:
            for row in result:
                print(row)
        else:
            print("None")

        # Query 15
        print("\n15. Find average tip per head (tip/size) on weekend, for each combination of sex and smoker status")
        query15 = '''SELECT sex, 
                    smoker, 
                    ROUND(SUM(tip)/SUM(size),2) AS tip_per_head
                    FROM tips
                    WHERE day = :day1 OR day = :day2
                    GROUP BY sex, smoker'''
        cursor.execute(query15, {"day1": "Sat", "day2": "Sun"})
        print("(sex, smoker, tip_per_head)")
        result = cursor.fetchall()
        if result:
            for row in result:
                print(row)
        else:
            print("None")

except Exception as e:
        print(e.with_traceback(None))

conn.close()


11. Find revenue contribution percentage (total_bill/sum(total_bill)) for each combination of day and time
(day, time, revenue_contribution_pct)
('Fri', 'Dinner', 4.89)
('Fri', 'Lunch', 1.86)
('Sat', 'Dinner', 36.84)
('Sun', 'Dinner', 33.7)
('Thur', 'Dinner', 0.39)
('Thur', 'Lunch', 22.32)

12. Find tip contribution percentage (tip/sum(tip)) for each combination of sex and smoker status
(sex, smoker, tip_contribution_pct)
('Female', 'No', 20.47)
('Female', 'Yes', 13.22)
('Male', 'No', 41.28)
('Male', 'Yes', 25.02)

13. Find maximum and minimum of bill amount, for each combination of sex, day, and time
(sex, day, time, aggregate_type, bill_amount)
('Female', 'Fri', 'Dinner', 'bill max', 22.75)
('Female', 'Fri', 'Dinner', 'bill min', 5.75)
('Female', 'Fri', 'Lunch', 'bill max', 16.27)
('Female', 'Fri', 'Lunch', 'bill min', 10.09)
('Female', 'Sat', 'Dinner', 'bill max', 44.3)
('Female', 'Sat', 'Dinner', 'bill min', 3.07)
('Female', 'Sun', 'Dinner', 'bill max', 35.26)
('Female', 'Sun', 'D

UPDATE:

In [36]:
try:
    with sqlite3.connect('tips.db') as conn:
        cursor = conn.cursor()

        print("\nUPDATE: Update the record that corresponds to id=10 and set smoker to Yes.")
        
        print("\nExisting record before update:")
        testquery_before = '''SELECT * 
                        FROM tips
                        WHERE ROWID = 10'''
        cursor.execute(testquery_before)
        print("(total_bill, tip, sex, smoker, day, time, size)")
        for row in cursor.fetchall():
            print(row)
        
        updatequery = '''UPDATE tips 
                        SET smoker = "Yes"
                        WHERE ROWID = 10'''
        cursor.execute(updatequery)

        print("\nUpdated record:")
        testquery_after = '''SELECT * 
                        FROM tips
                        WHERE ROWID = 10'''
        cursor.execute(testquery_after)
        print("(total_bill, tip, sex, smoker, day, time, size)")
        for row in cursor.fetchall():
            print(row)

except Exception as e:
        print(e.with_traceback(None))

conn.close()


UPDATE: Update the record that corresponds to id=10 and set smoker to Yes.

Existing record before update:
(total_bill, tip, sex, smoker, day, time, size)
(14.78, 3.23, 'Male', 'No', 'Sun', 'Dinner', 2)

Updated record:
(total_bill, tip, sex, smoker, day, time, size)
(14.78, 3.23, 'Male', 'Yes', 'Sun', 'Dinner', 2)


DELETE:

In [37]:
try:
    with sqlite3.connect('tips.db') as conn:
        cursor = conn.cursor()

        print("\nDELETE: Delete records from the database that have a total bill that is less than $10.")
        
        print("\nExisting records with (total_bill < $10) BEFORE deletion:")
        testquery_before = '''SELECT * 
                                FROM tips
                                WHERE total_bill < 10'''
        cursor.execute(testquery_before)
        print("(total_bill, tip, sex, smoker, day, time, size)")
        for row in cursor.fetchall():
            print(row)

        deletequery = '''DELETE FROM tips 
                        WHERE total_bill < 10'''
        cursor.execute(deletequery)
        
        print("\nRecords with (total_bill < $10) AFTER deletion:")
        testquery_after1 = '''SELECT * 
                                FROM tips
                                WHERE total_bill < 10'''
        cursor.execute(testquery_after1)
        print("(total_bill, tip, sex, smoker, day, time, size)")
        print(cursor.fetchall())
        
        print("\nAFTER deletion: Minimum total bill amounts")
        testquery_after2 = '''SELECT "MIN", MIN(total_bill) AS min_bill 
                                FROM tips'''
        cursor.execute(testquery_after2)
        print(cursor.fetchall())

except Exception as e:
        print(e.with_traceback(None))

conn.close()


DELETE: Delete records from the database that have a total bill that is less than $10.

Existing records with (total_bill < $10) BEFORE deletion:
(total_bill, tip, sex, smoker, day, time, size)
(8.77, 2.0, 'Male', 'No', 'Sun', 'Dinner', 2)
(9.55, 1.45, 'Male', 'No', 'Sat', 'Dinner', 2)
(9.68, 1.32, 'Male', 'No', 'Sun', 'Dinner', 2)
(9.94, 1.56, 'Male', 'No', 'Sun', 'Dinner', 2)
(3.07, 1.0, 'Female', 'Yes', 'Sat', 'Dinner', 1)
(5.75, 1.0, 'Female', 'Yes', 'Fri', 'Dinner', 2)
(7.25, 1.0, 'Female', 'No', 'Sat', 'Dinner', 1)
(8.52, 1.48, 'Male', 'No', 'Thur', 'Lunch', 2)
(8.51, 1.25, 'Female', 'No', 'Thur', 'Lunch', 2)
(8.35, 1.5, 'Female', 'No', 'Thur', 'Lunch', 2)
(9.78, 1.73, 'Male', 'No', 'Thur', 'Lunch', 2)
(7.51, 2.0, 'Male', 'No', 'Thur', 'Lunch', 2)
(7.25, 5.15, 'Male', 'Yes', 'Sun', 'Dinner', 2)
(9.6, 4.0, 'Female', 'Yes', 'Sun', 'Dinner', 2)
(7.56, 1.44, 'Male', 'No', 'Thur', 'Lunch', 2)
(7.74, 1.44, 'Male', 'Yes', 'Sat', 'Dinner', 2)
(8.58, 1.92, 'Male', 'Yes', 'Fri', 'Lunch', 