In [155]:
import sqlite3
import pandas as pd

# CREATE

## 1. Create a SQLite database

In [167]:
conn = sqlite3.connect('tips.db')

In [168]:
c = conn.cursor()

In [162]:
conn.commit()

## 2. Insert data to the database

In [169]:
df =  pd.read_csv('data/tips.csv')
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [170]:
try:
    with sqlite3.connect('tips.db') as conn:
        df.to_sql('TIPS', conn, if_exists='replace', index=False)
        conn.commit()
except Exception as err:
    print(f"Error occurred while inserting data to table: {err}")

In [171]:
c.execute("select * from TIPS")
conn.commit()
print(c.fetchone())

(16.99, 1.01, 'Female', 'No', 'Sun', 'Dinner', 2)


# READ

## 1. Retrieve the average tip percentage for each day of the week

In [172]:
with sqlite3.connect('tips.db') as conn:
    try:
            query1 = '''select 
                            day, 
                            round(sum(tip)/sum(total_bill)*100,2) as avg_tip_percentage
                        from TIPS 
                        group by day'''
            answer1 = pd.read_sql_query(query1, conn)
            conn.commit()
            print(answer1)
    except Exception as err:
            print('Query Failed: %s\nError: %s' % (query1, str(err)))

    day  avg_tip_percentage
0   Fri               15.94
1   Sat               14.64
2   Sun               15.20
3  Thur               15.67


## 2. Find the maximum and minimum total bill amounts

In [173]:
with sqlite3.connect('tips.db') as conn:
    try:
        query2 = '''select 
                        max(total_bill) as max_bill, 
                        min(total_bill) as min_bill 
                    from TIPS'''
        answer2 = pd.read_sql_query(query2, conn)
        conn.commit()
        print(answer2)
    except Exception as err:
        print('Query Failed: %s\nError: %s' % (query2, str(err)))

   max_bill  min_bill
0     50.81      3.07


## 3. Count the number of parties for each size

In [174]:
with sqlite3.connect('tips.db') as conn:
    try:
        query3 = '''select 
                        size, count(*) as num_each_size
                    from TIPS
                    group by size'''
        answer3 = pd.read_sql_query(query3, conn)
        conn.commit()
        print(answer3)
    except Exception as err:
        print('Query Failed: %s\nError: %s' % (query3, str(err)))

   size  num_each_size
0     1              4
1     2            156
2     3             38
3     4             37
4     5              5
5     6              4


## 4. Retrieve the total bill and tip for parties of size 4 or more, where the tip percentage is greater than 15%

In [175]:
with sqlite3.connect('tips.db') as conn:
    try:
        query4 = '''select 
                        total_bill, tip, 
                        round(tip/total_bill*100,2) as tip_percentage,
                        size
                    from TIPS
                    where size >= 4
                    and round(tip/total_bill*100,2) > 15'''
        answer4 = pd.read_sql_query(query4, conn)
        conn.commit()
        print(answer4)
    except Exception as err:
        print('Query Failed: %s\nError: %s' % (query4, str(err)))

    total_bill   tip  tip_percentage  size
0        25.29  4.71           18.62     4
1        18.43  3.00           16.28     4
2        39.42  7.58           19.23     4
3        30.40  5.60           18.42     4
4        32.40  6.00           18.52     4
5        25.56  4.34           16.98     4
6        18.29  3.76           20.56     4
7        29.93  5.07           16.94     4
8        34.30  6.70           19.53     6
9        27.05  5.00           18.48     6
10       29.85  5.14           17.22     5
11       21.50  3.50           16.28     4
12       23.17  6.50           28.05     4
13       20.69  5.00           24.17     5
14       20.53  4.00           19.48     4
15       25.89  5.16           19.93     4
16       48.33  9.00           18.62     4


## 5. Retrieve the total bill, tip amount, and tip percentage for each combination of day and time, sorted by tip percentage in descending order

In [176]:
with sqlite3.connect('tips.db') as conn:
    try:
        query5 = '''select 
                        day, time,
                        sum(total_bill) as total_bill, 
                        sum(tip) as tip_amount, 
                        round(sum(tip)/sum(total_bill)*100,2) as tip_percentage
                    from TIPS
                    group by day, time
                    order by tip_percentage desc'''
        answer5 = pd.read_sql_query(query5, conn)
        conn.commit()
        print(answer5)
    except Exception as err:
        print('Query Failed: %s\nError: %s' % (query5, str(err)))

    day    time  total_bill  tip_amount  tip_percentage
0   Fri   Lunch       89.92       16.68           18.55
1  Thur  Dinner       18.78        3.00           15.97
2  Thur   Lunch     1077.55      168.83           15.67
3   Sun  Dinner     1627.16      247.39           15.20
4   Fri  Dinner      235.96       35.28           14.95
5   Sat  Dinner     1778.40      260.40           14.64


## 6. Find the average tip percentage for each combination of day, time, and smoker status

In [177]:
with sqlite3.connect('tips.db') as conn:
    try:
        query6 = '''select 
                        day, time, smoker,
                        round(sum(tip)/sum(total_bill)*100,2) as avg_tip_percentage
                    from TIPS
                    group by day, time, smoker'''
        answer6 = pd.read_sql_query(query6, conn)
        conn.commit()
        print(answer6)
    except Exception as err:
        print('Query Failed: %s\nError: %s' % (query6, str(err)))

     day    time smoker  avg_tip_percentage
0    Fri  Dinner     No               14.30
1    Fri  Dinner    Yes               15.16
2    Fri   Lunch     No               18.77
3    Fri   Lunch    Yes               18.50
4    Sat  Dinner     No               15.78
5    Sat  Dinner    Yes               13.51
6    Sun  Dinner     No               15.45
7    Sun  Dinner    Yes               14.58
8   Thur  Dinner     No               15.97
9   Thur   Lunch     No               15.62
10  Thur   Lunch    Yes               15.79


## 7. Retrieve the total bill, tip amount, and tip percentage for each sex, sorted by total bill in descending order, and limit the results to the top 5 records

In [178]:
with sqlite3.connect('tips.db') as conn:
    try:
        query7 = '''select * from
                    (select 
                        sex,
                        total_bill,
                        tip, 
                        round(tip/total_bill*100,2) as tip_percentage
                    from TIPS
                    where sex = 'Male'
                    order by total_bill desc
                    limit 5) male
                    union all
                    select * from 
                    (select 
                        sex,
                        total_bill,
                        tip, 
                        round(tip/total_bill*100,2) as tip_percentage
                    from TIPS
                    where sex = 'Female'
                    order by total_bill desc
                    limit 5)female
                    '''
        answer7 = pd.read_sql_query(query7, conn)
        conn.commit()
        print(answer7)
    except Exception as err:
        print('Query Failed: %s\nError: %s' % (query7, str(err)))

      sex  total_bill    tip  tip_percentage
0    Male       50.81  10.00           19.68
1    Male       48.33   9.00           18.62
2    Male       48.27   6.73           13.94
3    Male       48.17   5.00           10.38
4    Male       45.35   3.50            7.72
5  Female       44.30   2.50            5.64
6  Female       43.11   5.00           11.60
7  Female       35.83   4.67           13.03
8  Female       35.26   5.00           14.18
9  Female       34.83   5.17           14.84


## 8. Find the maximum and minimum tip percentage for each day and time combination, along with the corresponding total bill and tip amount

In [179]:
with sqlite3.connect('tips.db') as conn:
    try:
        query8 = '''select 
                        day, time,
                        total_bill, 
                        tip,
                        max(round(tip/total_bill*100,2)) as max_tip_percentage,
                        min(round(tip/total_bill*100,2)) as min_tip_percentage
                    from TIPS
                    group by day, time'''
        answer8 = pd.read_sql_query(query8, conn)
        conn.commit()
        print(answer8)
    except Exception as err:
        print('Query Failed: %s\nError: %s' % (query8, str(err)))

    day    time  total_bill   tip  max_tip_percentage  min_tip_percentage
0   Fri  Dinner       28.97  3.00               26.35               10.36
1   Fri   Lunch       13.42  1.58               25.93               11.77
2   Sat  Dinner       32.83  1.17               32.57                3.56
3   Sun  Dinner       16.99  1.01               71.03                5.94
4  Thur  Dinner       18.78  3.00               15.97               15.97
5  Thur   Lunch       18.64  1.36               26.63                7.30


## 9. Retrieve the total bill, tip amount, and tip percentage for parties of size 4 or more, where the tip percentage is greater than 15%, and the total bill is between $50 and $100

In [180]:
with sqlite3.connect('tips.db') as conn:
    try:
        query9 = '''select 
                        total_bill, 
                        tip,
                        round(tip/total_bill*100,2) as tip_percentage
                    from TIPS
                    where 
                        size >= 4 
                        and tip_percentage > 15
                        and total_bill between 50 and 100'''
        answer9 = pd.read_sql_query(query9, conn)
        conn.commit()
        print(answer9)
    except Exception as err:
        print('Query Failed: %s\nError: %s' % (query9, str(err)))

Empty DataFrame
Columns: [total_bill, tip, tip_percentage]
Index: []


## 10. Find the average tip percentage for each combination of day, time, and smoker status, but only include combinations with more than 5 records

In [181]:
with sqlite3.connect('tips.db') as conn:
    try:
        query10 = '''select 
                        day, time, smoker,
                        round(sum(tip)/sum(total_bill)*100,2) as avg_tip_percentage
                    from TIPS
                    group by day, time, smoker
                    having count(*) > 5'''
        answer10 = pd.read_sql_query(query10, conn)
        conn.commit()
        print(answer10)
    except Exception as err:
        print('Query Failed: %s\nError: %s' % (query10, str(err)))

    day    time smoker  avg_tip_percentage
0   Fri  Dinner    Yes               15.16
1   Fri   Lunch    Yes               18.50
2   Sat  Dinner     No               15.78
3   Sat  Dinner    Yes               13.51
4   Sun  Dinner     No               15.45
5   Sun  Dinner    Yes               14.58
6  Thur   Lunch     No               15.62
7  Thur   Lunch    Yes               15.79


## 5 additional queries

## 11. Find the total bill, tip amount and average tip percentage for each combination of sex and smoker status, sorted by average tip percentage in descending order

In [182]:
with sqlite3.connect('tips.db') as conn:
    try:
        query11 = '''select 
                        sex, smoker,
                        sum(total_bill) as total_bill,
                        sum(tip) as tip_amount,
                        round(sum(tip)/sum(total_bill)*100,2) as avg_tip_percentage
                    from TIPS
                    group by sex, smoker
                    order by avg_tip_percentage desc'''
        answer11 = pd.read_sql_query(query11, conn)
        conn.commit()
        print(answer11)
    except Exception as err:
        print('Query Failed: %s\nError: %s' % (query11, str(err)))

      sex smoker  total_bill  tip_amount  avg_tip_percentage
0  Female    Yes      593.27       96.74               16.31
1    Male     No     1919.75      302.00               15.73
2  Female     No      977.68      149.77               15.32
3    Male    Yes     1337.07      183.07               13.69


## 12. Find average total bill and tip amount for groups of different sizes, sorted by ascending order of size

In [183]:
with sqlite3.connect('tips.db') as conn:
    try:
        query12 = '''
                    select 
                        size, 
                        round(avg(total_bill),2) as avg_total_bill,
                        round(avg(tip),2) as avg_tip_amount
                    from TIPS
                    group by size
                    order by size '''
        answer12 = pd.read_sql_query(query12, conn)
        conn.commit()
        print(answer12)
    except Exception as err:
        print('Query Failed: %s\nError: %s' % (query12, str(err)))

   size  avg_total_bill  avg_tip_amount
0     1            7.24            1.44
1     2           16.45            2.58
2     3           23.28            3.39
3     4           28.61            4.14
4     5           30.07            4.03
5     6           34.83            5.23


## 13. Count of orders and total tips for smokers vs. non-smokers

In [184]:
with sqlite3.connect('tips.db') as conn:
    try:
        query13 = '''
                    select 
                        smoker,
                        count(*) AS order_count,
                        sum(tip) AS total_tip_amount
                    from TIPS
                    group by smoker '''
        answer13 = pd.read_sql_query(query13, conn)
        conn.commit()
        print(answer13)
    except Exception as err:
        print('Query Failed: %s\nError: %s' % (query13, str(err)))

  smoker  order_count  total_tip_amount
0     No          151            451.77
1    Yes           93            279.81


## 14. Find the top 3 days with highest total bill

In [185]:
with sqlite3.connect('tips.db') as conn:
    try:
        query14 = '''
                    select 
                        day,
                        sum(total_bill) AS total_bill
                    from TIPS
                    group by day
                    order by total_bill desc
                    limit 3'''
        answer14 = pd.read_sql_query(query14, conn)
        conn.commit()
        print(answer14)
    except Exception as err:
        print('Query Failed: %s\nError: %s' % (query14, str(err)))

    day  total_bill
0   Sat     1778.40
1   Sun     1627.16
2  Thur     1096.33


## 15. Find the total bill for each time, including only records with tips great than or equal to 2

In [186]:
with sqlite3.connect('tips.db') as conn:
    try:
        query15 = '''
                    select 
                        time,
                        sum(total_bill) AS total_bill
                    from TIPS
                    where tip >= 2
                    group by time'''
        answer15 = pd.read_sql_query(query15, conn)
        conn.commit()
        print(answer15)
    except Exception as err:
        print('Query Failed: %s\nError: %s' % (query15, str(err)))

     time  total_bill
0  Dinner     3283.02
1   Lunch      995.42


# UPDATE

In [187]:
# It was determined that there was an error in the database. Please update the record that corresponds to id=10 and set smoker to Yes.

with sqlite3.connect('tips.db') as conn:
    try:
        update_q = '''
                    update TIPS
                    set smoker = 'Yes'
                    where rowid = 10
                  '''
        c.execute(update_q)
        conn.commit()
        print("Update Successfully")
        
        # After update
        c.execute('''select * from tips where rowid = 10''')
        print("After updating:")
        print(c.fetchall())
        conn.commit()

    except Exception as err:
        print('Error: %s' % str(err))

Update Successfully
After updating:
[(14.78, 3.23, 'Male', 'Yes', 'Sun', 'Dinner', 2)]


# DELETE

In [188]:
# Delete records from the database that have a total bill that is less than $10.

with sqlite3.connect('tips.db') as conn:
    try:
        delete_q = '''
                    delete from TIPS
                    where total_bill < 10
                  '''
        c.execute(delete_q)
        conn.commit()
        print("Delete Successfully")
    except Exception as err:
        print('Error: %s' % str(err))

Delete Successfully


In [189]:
conn.close()