In [1]:
'''Import Statements'''
import pandas as pd
import sqlite3

In [2]:
def create_db_table():
    '''
    Reads in the tips.csv, connects it to the SQLite database, and writes the data to a SQL table.
    '''
    try:
        # Read the tips.csv file
        df = pd.read_csv('./data/tips.csv')
        df.head()

        # Connect to the SQLite database, automatically commit and close the connection.
        with sqlite3.connect('tips.db') as conn:

            # Write the data to a SQL table (replacing if a table of the same name exists and ignoring a column of indexes)
            df.to_sql('tips_table', conn, if_exists='replace', index=True)
            
    except sqlite3.Error as error:
        print(f'An error occurred when creating the SQL db table -> {error}')

create_db_table()

**Create sqlite database using files from folder 'data'**

In [3]:
def create_db_table():
    '''
    Reads in the tips.csv, connects it to the SQLite database, and writes the data to a SQL table.
    '''
    try:
        # Read the tips.csv file
        df = pd.read_csv('./data/tips.csv')

        # Connect to the SQLite database, automatically commit and close the connection.
        with sqlite3.connect('tips.db') as conn:

            # Write the data to a SQL table (replacing if a table of the same name exists and ignoring a column of indexes)
            df.to_sql('tips_table', conn, if_exists='replace', index=True)
            
    except sqlite3.Error as error:
        print(f'An error occurred when creating the SQL db table -> {error}')

create_db_table()

**Q1. Retrieve the average tip % for each day of the week**

In [4]:
def q1():
    '''
    Average tip % = The (sum of all tips of a particular day)* 100/ (sum of all total bills of a particular day) 
    '''
    try:
        # Connect to the SQLite database
        with sqlite3.connect('tips.db') as conn:
            cursor = conn.cursor()

            # Query from the table
            query = '''
                    SELECT day, ROUND(SUM(tip)/SUM(total_bill)*?, ?) AS avg_tip
                    FROM tips
                    GROUP BY day
                    '''
            
            params = (100, 2)

            # Execute the query and fetch all of the results into variable 'rows'
            cursor.execute(query, params)

            df_avg_tip = pd.DataFrame(cursor.fetchall(), columns = ['day', 'avg_tip'])
            print(df_avg_tip)

    except sqlite3.Error as error:
        print(f'An error occurred when performing the q1 operation on the SQL db table -> {error}')

q1()

    day  avg_tip
0   Fri    15.94
1   Sat    14.64
2   Sun    15.20
3  Thur    15.67


**Q2. Find the maximum and minimum total bull amounts**

In [5]:
def q2():
    '''
    Finds the maximum and minimum total bull amounts
    '''
    try:
        # Connect to the SQLite database
        with sqlite3.connect('tips.db') as conn:
            cursor = conn.cursor()

            # Query from the table
            query = '''
                    SELECT MAX(total_bill) AS max_bill, MIN(total_bill) As min_bill
                    FROM tips
                    '''

            # Execute the query and fetch all of the results into variable 'rows'
            cursor.execute(query)

            df_bill = pd.DataFrame(cursor.fetchall(), columns = ['max_bill', 'min_bill'])
            print(df_bill)

    except sqlite3.Error as error:
        print(f'An error occurred when performing the q2 operation on the SQL db table -> {error}')

q2()

   max_bill  min_bill
0     50.81      3.07


**Q3. Count the number of parties for each size**

In [6]:
def q3():
    '''
    Counts the number of parties for each size
    '''
    try:
        # Connect to the SQLite database
        with sqlite3.connect('tips.db') as conn:
            cursor = conn.cursor()

            # Query from the table
            query = '''
                    SELECT size, COUNT(size) AS count
                    FROM tips
                    GROUP BY size
                    '''

            # Execute the query and fetch all of the results into variable 'rows'
            cursor.execute(query)

            df_count = pd.DataFrame(cursor.fetchall(), columns = ['size', 'count'])
            print(df_count)

    except sqlite3.Error as error:
        print(f'An error occurred when performing the q3 operation on the SQL db table -> {error}')

q3()

   size  count
0     1      4
1     2    156
2     3     38
3     4     37
4     5      5
5     6      4


**Q4. Retrieve the total bill and tip for parties of size 4 or more, where the tip percentage is greater than 15%**

In [7]:
def q4():
    '''
    Retrieves the total bill and tip for parties of size 4 or more, where the tip percentage is greater than 15%
    '''
    try:
        # Connect to the SQLite database
        with sqlite3.connect('tips.db') as conn:
            cursor = conn.cursor()

            # Query from the table
            query = '''
                    SELECT total_bill, tip, size, ROUND(tip/total_bill * ?, ?) AS tip_percentage
                    FROM tips 
                    WHERE size > ? AND ROUND(tip/total_bill*?, ?) > ?
                    '''
            
            params = (100, 2, 3, 100, 2, 15)

            # Execute the query and fetch all of the results into variable 'rows'
            cursor.execute(query, params)

            df_temp = pd.DataFrame(cursor.fetchall(), columns=['total_bill', 'tip', 'size', 'tip_percentage'])
            print(df_temp)

    except sqlite3.Error as error:
        print(f'An error occurred when performing the q4 operation on the SQL db table -> {error}')

q4()

    total_bill   tip  size  tip_percentage
0        25.29  4.71     4           18.62
1        18.43  3.00     4           16.28
2        39.42  7.58     4           19.23
3        30.40  5.60     4           18.42
4        32.40  6.00     4           18.52
5        25.56  4.34     4           16.98
6        18.29  3.76     4           20.56
7        29.93  5.07     4           16.94
8        34.30  6.70     6           19.53
9        27.05  5.00     6           18.48
10       29.85  5.14     5           17.22
11       21.50  3.50     4           16.28
12       23.17  6.50     4           28.05
13       20.69  5.00     5           24.17
14       20.53  4.00     4           19.48
15       25.89  5.16     4           19.93
16       48.33  9.00     4           18.62


**Q5. Retrieve the total bill, tip amount, and tip % for each combination of day and time, sorted by tip % in descending order.** 

In [8]:
def q5():
    '''
    Retrieves the total bill, tip amount, and tip % for each combination of day and time, sorted by tip % in descending order.
    '''
    try:
        # Connect to the SQLite database
        with sqlite3.connect('tips.db') as conn:
            cursor = conn.cursor()

            # Query from the table
            query = '''
                    SELECT day, time, SUM(total_bill) AS total_bill , SUM(tip) AS total_tip, ROUND(tip/total_bill * ?, ?) AS tip_percentage
                    FROM tips     
                    GROUP BY day, time
                    ORDER BY tip_percentage DESC
                    '''
            
            params = (100, 2)

            # Execute the query and fetch all of the results into variable 'rows'
            cursor.execute(query, params)

            df_temp = pd.DataFrame(cursor.fetchall(), columns=['day', 'time','total_bill', 'tip', 'tip_percentage'])
            print(df_temp)

    except sqlite3.Error as error:
        print(f'An error occurred when performing the q5 operation on the SQL db table -> {error}')

q5()

    day    time  total_bill     tip  tip_percentage
0   Fri   Lunch       89.92   16.68           18.09
1   Sat  Dinner     1778.40  260.40           16.22
2  Thur  Dinner       18.78    3.00           15.97
3  Thur   Lunch     1077.55  168.83           14.71
4   Fri  Dinner      235.96   35.28           10.36
5   Sun  Dinner     1627.16  247.39            5.94


**Q11. Find the average tip percentage, separately for weekends and weekdays.**

In [9]:
def q11():
    '''
    Finds the average tip percentage, separately for weekends and weekdays.
    '''
    try:
        # Connect to the SQLite database
        with sqlite3.connect('tips.db') as conn:
            cursor = conn.cursor()

            # Query from the table
            query = '''
                    SELECT 
                        CASE
                            WHEN day IN (?, ?) THEN 'Weekend'
                            ELSE 'Weekday'
                        END AS day_type,
                        AVG(tip / total_bill * ?)
                    FROM tips_table
                    GROUP BY day_type
                    '''
            
            params = ('Sat', 'Sun', 100)

            # Execute the query and fetch all of the results into variable 'rows'
            cursor.execute(query, params)
            rows = cursor.fetchall()

            print("██████████████████████████████████████████████████")
            print("Average Tip Percentages for Weekends and Weekdays")
            for day_type, avg_tip_perc in rows:
                print(f"Day Type: {day_type} -> Average Tip Percentage: {avg_tip_perc:.2f}%")

    except sqlite3.Error as error:
        print(f'An error occurred when performing the q11 operation on the SQL db table -> {error}')

q11()

██████████████████████████████████████████████████
Average Tip Percentages for Weekends and Weekdays
Day Type: Weekday -> Average Tip Percentage: 16.33%
Day Type: Weekend -> Average Tip Percentage: 15.96%


**Q12: Find the 3 days with the largest average party size and total bill, and provide the average tip percentage for each of these days.**

In [10]:
def q12():
    '''
    Finds the 3 days with the largest average party size and total bill, and provide the average tip percentage for each of these days.
    '''
    try:
        # Connect to the SQLite database
        with sqlite3.connect('tips.db') as conn:
            cursor = conn.cursor()

            # Query from the table
            query = '''
                    SELECT
                        day,
                        AVG(size) AS avg_size,
                        AVG(total_bill) AS avg_tot_bill,
                        AVG(tip / total_bill * ?)
                    FROM tips_table
                    GROUP BY day
                    ORDER BY avg_size DESC, avg_tot_bill DESC
                    LIMIT ?
                    '''
            
            params = (100, 3)

            # Execute the query and fetch all of the results into variable 'rows'
            cursor.execute(query, params)
            rows = cursor.fetchall()

            print("██████████████████████████████████████████████████████████████")
            print("Top 3 Days with the Largest Average Party Size and Total Bill")
            for day, avg_party_size, avg_total_bill, avg_tip_percentage in rows:
                print(f"Day: {day}, Avg Party Size: {avg_party_size:.2f}, Avg Total Bill: ${avg_total_bill:.2f} -> Avg Tip Percentage: {avg_tip_percentage:.2f}%")

    except sqlite3.Error as error:
        print(f'An error occurred when performing the q12 operation on the SQL db table -> {error}')

q12()

██████████████████████████████████████████████████████████████
Top 3 Days with the Largest Average Party Size and Total Bill
Day: Sun, Avg Party Size: 2.84, Avg Total Bill: $21.41 -> Avg Tip Percentage: 16.69%
Day: Sat, Avg Party Size: 2.52, Avg Total Bill: $20.44 -> Avg Tip Percentage: 15.32%
Day: Thur, Avg Party Size: 2.45, Avg Total Bill: $17.68 -> Avg Tip Percentage: 16.13%


**Q13: Find the total number of parties served each day, grouped by smoker status and sorted by the average party size in descending order.**

In [11]:
def q13():
    '''
    Finds the total number of parties served each day, grouped by smoker status and sorted by the average party size in descending order.
    '''
    try:
        # Connect to the SQLite database
        with sqlite3.connect('tips.db') as conn:
            cursor = conn.cursor()

            # Query from the table
            query = '''
                    SELECT
                        day,
                        smoker,
                        COUNT(*),
                        AVG(size) AS avg_size
                    FROM tips_table
                    GROUP BY day, smoker
                    ORDER BY avg_size DESC;
                    '''

            # Execute the query and fetch all of the results into variable 'rows'
            cursor.execute(query)
            rows = cursor.fetchall()

            print("█████████████████████████████████████████████████████████████████████████████████████████")
            print("Total # of parties by day and smoker status, with the average size of each day's parties")
            for day, smoker, tot_parties, avg_size in rows:
                print(f"Day: {day}, Smoker: {smoker} -> Total Parties: {tot_parties}, Average Size: {avg_size:.2f}")

    except sqlite3.Error as error:
        print(f'An error occurred when performing the q13 operation on the SQL db table -> {error}')

q13()

█████████████████████████████████████████████████████████████████████████████████████████
Total # of parties by day and smoker status, with the average size of each day's parties
Day: Sun, Smoker: No -> Total Parties: 57, Average Size: 2.93
Day: Sun, Smoker: Yes -> Total Parties: 19, Average Size: 2.58
Day: Sat, Smoker: No -> Total Parties: 45, Average Size: 2.56
Day: Thur, Smoker: No -> Total Parties: 45, Average Size: 2.49
Day: Sat, Smoker: Yes -> Total Parties: 42, Average Size: 2.48
Day: Thur, Smoker: Yes -> Total Parties: 17, Average Size: 2.35
Day: Fri, Smoker: No -> Total Parties: 4, Average Size: 2.25
Day: Fri, Smoker: Yes -> Total Parties: 15, Average Size: 2.07


**Q14: Find the average tip percentage for each combination of sex and time of day, and identify which combination has the highest average tip percentage.**

In [12]:
def q14():
    '''
    Finds the average tip percentage for each combination of sex and time of day, and identify which combination has the highest average tip percentage.
    '''
    try:
        # Connect to the SQLite database
        with sqlite3.connect('tips.db') as conn:
            cursor = conn.cursor()

            # Query from the table
            query = '''
                    SELECT
                        sex,
                        time,
                        AVG(tip / total_bill * ?) AS avg_tip
                    FROM tips_table
                    GROUP BY sex, time
                    ORDER BY avg_tip DESC
                    LIMIT ?
                    '''
            
            params = (100, 1)

            # Execute the query and fetch all of the results into variable 'rows'
            cursor.execute(query, params)
            rows = cursor.fetchone()

            sex, time, average_tip_percentage = rows

            print("████████████████████████████████████████████████████████")
            print(f"The combination with the highest average tip percentage")
            print(f"Sex: {sex}, Time: {time} -> Average Tip Percentage: {average_tip_percentage:.2f}%")

    except sqlite3.Error as error:
        print(f'An error occurred when performing the q14 operation on the SQL db table -> {error}')

q14()

████████████████████████████████████████████████████████
The combination with the highest average tip percentage
Sex: Female, Time: Dinner -> Average Tip Percentage: 16.93%


**Q15: Find the average tip amount for each party size with a total bill greater than $25, and place an emphasis on party sizes with an average tip above $3.**

In [13]:
def q15():
    '''
    Finds the average tip amount for each party size with a total bill greater than $25, and place an emphasis on party sizes with an average tip above $3.
    '''
    try:
        # Connect to the SQLite database
        with sqlite3.connect('tips.db') as conn:
            cursor = conn.cursor()

            # Query from the table
            query = '''
                    SELECT
                        size,
                        AVG(tip) AS avg_tip
                    FROM tips_table
                    WHERE total_bill > ?
                    GROUP BY size
                    HAVING avg_tip > ?
                    ORDER BY avg_tip DESC
                    '''
            
            params = (25, 3)

            # Execute the query and fetch all of the results into variable 'rows'
            cursor.execute(query, params)
            rows = cursor.fetchall()

            print("██████████████████████████████████████")
            print("Party Sizes with Average Tip Above $3")
            for party_size, average_tip_amount in rows:
                print(f"Party Size: {party_size}, Average Tip Amount: ${average_tip_amount:.2f}")

    except sqlite3.Error as error:
        print(f'An error occurred when performing the q15 operation on the SQL db table -> {error}')

q15()

██████████████████████████████████████
Party Sizes with Average Tip Above $3
Party Size: 6, Average Tip Amount: $5.22
Party Size: 4, Average Tip Amount: $4.94
Party Size: 3, Average Tip Amount: $4.46
Party Size: 5, Average Tip Amount: $3.79
Party Size: 2, Average Tip Amount: $3.18


**UPDATE - It was determined that there was an error in the database. Please update the record that corresponds to id=10 and set smoker to Yes.**

In [14]:
def update():
    '''
    Updates the record that corresponds to id=10 and set smoker to Yes.
    '''
    try:
        # Connect to the SQLite database
        with sqlite3.connect('tips.db') as conn:
            cursor = conn.cursor()

            # Update the record where id = 10 and set smoker to 'Yes'
            query = '''
                    UPDATE tips_table
                    SET smoker = ?
                    WHERE "index" = ?
                    '''
            
            params = ('Yes', 10)
            
            cursor.execute(query, params)
            conn.commit()  # Commit the changes

    except sqlite3.Error as error:
        print(f'An error occurred when performing the UPDATE operation on the SQL db table -> {error}')

update()

In [15]:
def check_update():
    '''
    Checks whether the row associated with id=10 has smoker set to 'Yes'
    '''
    try:
        # Connect to the SQLite database
        with sqlite3.connect('tips.db') as conn:
            cursor = conn.cursor()

            # Query to select all records from the table
            query = 'SELECT * FROM tips_table WHERE "index" = 10'

            cursor.execute(query)

            # Fetch all rows from the table
            rows = cursor.fetchall()

            # Print the rows to view the database contents
            for row in rows:
                print(row)

    except sqlite3.Error as error:
        print(f'An error occurred: {error}')

check_update()

(10, 10.27, 1.71, 'Male', 'Yes', 'Sun', 'Dinner', 2)


**DELETE - Delete records from the database that have a total bill that is less than $10.**

In [16]:
def delete():
    '''
    Deletes records from the database that have a total bill that is less than $10.
    '''
    try:
        # Connect to the SQLite database
        with sqlite3.connect('tips.db') as conn:
            cursor = conn.cursor()

            # Update the record where id = 10 and set smoker to 'Yes'
            query = 'DELETE FROM tips_table WHERE total_bill < 10'
            
            cursor.execute(query)
            conn.commit()  # Commit the changes

    except sqlite3.Error as error:
        print(f'An error occurred when performing the DELETE operation on the SQL db table -> {error}')

delete()

In [17]:
def check_delete():
    '''
    Checks whether the records from the database that have a total bill that is less than $10 were deleted.
    '''
    try:
        # Connect to the SQLite database
        with sqlite3.connect('tips.db') as conn:
            cursor = conn.cursor()

            # Query to select all records from the table
            select_query = 'SELECT * FROM tips_table WHERE total_bill < 10'
            cursor.execute(select_query)

            # Fetch all rows from the table
            rows = cursor.fetchall()

            # Print the rows to view the database contents
            for row in rows:
                print(row)

    except sqlite3.Error as error:
        print(f'An error occurred: {error}')

check_delete()