In [1]:
import sqlite3
from pathlib import Path
import pandas as pd

### CREATE

In [2]:
try:
    # Use sqlite3 to connect to tips.db
    con=sqlite3.connect('tips.db')
    # Define cursor to execute SQL commmand
    cur=con.cursor()
except sqlite3.Error as e:
    print(f"Database Connection Error:{e}")



In [3]:
try:
    # Read csv to pandas dataframe
    tips=pd.read_csv('data/tips.csv')
    # transform pandas dataframe to Database Table
    tips.to_sql('TIPS',con,if_exists='replace', index=True,index_label='id')
except Exception as e:
    print(f"Reading CSV to Database Error:{e}")


Show All tables in the tips.db

In [4]:
try:
    # query to show all tables in the tips.db
    querychecktable = '''
    SELECT name 
    FROM sqlite_master 
    WHERE type='table'
    '''

    # show all tables in tips.db
    dftables = pd.read_sql_query(querychecktable, con)
    print(dftables)
except pd.io.sql.DatabaseError as e:
    print(f"Database SQL Execution Error:{e}")


   name
0  TIPS


SHOW Content in Table Tips

In [5]:
try:
    # query to show content in table tips
    querycheckcontent='''
    SELECT *
    FROM TIPS
    '''

    # show the content in table tips
    dftips=pd.read_sql_query(querycheckcontent,con)
    print(dftips)

except pd.io.sql.DatabaseError as e:
    print(f"Database SQL Execution Error:{e}")

      id  total_bill   tip     sex smoker   day    time  size
0      0       16.99  1.01  Female     No   Sun  Dinner     2
1      1       10.34  1.66    Male     No   Sun  Dinner     3
2      2       21.01  3.50    Male     No   Sun  Dinner     3
3      3       23.68  3.31    Male     No   Sun  Dinner     2
4      4       24.59  3.61  Female     No   Sun  Dinner     4
..   ...         ...   ...     ...    ...   ...     ...   ...
239  239       29.03  5.92    Male     No   Sat  Dinner     3
240  240       27.18  2.00  Female    Yes   Sat  Dinner     2
241  241       22.67  2.00    Male    Yes   Sat  Dinner     2
242  242       17.82  1.75    Male     No   Sat  Dinner     2
243  243       18.78  3.00  Female     No  Thur  Dinner     2

[244 rows x 8 columns]


### READ

1. Retrieve the average tip percentage for each day of the week

In [6]:
try:
    # query of the 1st read task
    query1='''
        SELECT DAY,ROUND(SUM(tip)/SUM(total_bill)*100) AS avg_tip_percentage 
        FROM TIPS
        GROUP BY DAY
    '''

    df1=pd.read_sql_query(query1,con)
    # show the output of this Read SQL
    print(df1)
except pd.io.sql.DatabaseError as e:
    print(f"Database SQL Execution Error:{e}")

    day  avg_tip_percentage
0   Fri                16.0
1   Sat                15.0
2   Sun                15.0
3  Thur                16.0


2.Find the maximum and minimum total bull amounts

In [7]:
try:    
    # query of the 2nd read task
    query2='''
        SELECT MAX(total_bill) AS max_total_bill,MIN(total_bill) AS min_total_bill 
        FROM TIPS
    '''

    df2=pd.read_sql_query(query2,con)
    # show the output of this Read SQL
    print(df2)
except pd.io.sql.DatabaseError as e:
    print(f"Database SQL Execution Error:{e}")

   max_total_bill  min_total_bill
0           50.81            3.07


3.Count the number of parties for each size

In [8]:
try:
    # query of the 3rd read task
    query3='''
        SELECT size,COUNT(*) AS number 
        FROM TIPS
        GROUP BY size
    '''

    df3=pd.read_sql_query(query3,con)
    # show the output of this Read SQL
    print(df3)
except pd.io.sql.DatabaseError as e:
    print(f"Database SQL Execution Error:{e}")

   size  number
0     1       4
1     2     156
2     3      38
3     4      37
4     5       5
5     6       4


4.Retrieve the total bill and tip for parties of size 4 or more, where the tip percentage is greater than 15%

In [9]:
try:
    # query of the 4th read task
    query4='''
        SELECT total_bill,tip 
        FROM TIPS
        WHERE size>=4 AND tip/total_bill>0.15
    '''

    df4=pd.read_sql_query(query4,con)
    # show the output of this Read SQL
    print(df4)

except pd.io.sql.DatabaseError as e:
    print(f"Database SQL Execution Error:{e}")

    total_bill   tip
0        25.29  4.71
1        18.43  3.00
2        39.42  7.58
3        30.40  5.60
4        32.40  6.00
5        25.56  4.34
6        18.29  3.76
7        29.93  5.07
8        34.30  6.70
9        27.05  5.00
10       29.85  5.14
11       21.50  3.50
12       23.17  6.50
13       20.69  5.00
14       20.53  4.00
15       25.89  5.16
16       48.33  9.00


5.Retrieve the total bill, tip amount, and tip percentage for each combination of day and time, sorted by tip percentage in descending order

In [10]:
try:
    # query of the 5th read task 
    query5='''
        SELECT day,time,SUM(total_bill) AS sum_total_bill,SUM(tip) AS tip_amount,ROUND(SUM(tip)/SUM(total_bill)*100) AS tip_percentage
        FROM TIPS
        GROUP BY day,time
        ORDER BY tip_percentage DESC
    '''

    df5=pd.read_sql_query(query5,con)
    # show the output of this Read SQL
    print(df5)

except pd.io.sql.DatabaseError as e:
    print(f"Database SQL Execution Error:{e}")

    day    time  sum_total_bill  tip_amount  tip_percentage
0   Fri   Lunch           89.92       16.68            19.0
1  Thur  Dinner           18.78        3.00            16.0
2  Thur   Lunch         1077.55      168.83            16.0
3   Fri  Dinner          235.96       35.28            15.0
4   Sat  Dinner         1778.40      260.40            15.0
5   Sun  Dinner         1627.16      247.39            15.0


6.Find the average tip percentage for each combination of day, time, and smoker status

In [11]:
try:
    # query of the 6th read task 
    query6='''
        SELECT day,time,smoker,ROUND(SUM(tip)/SUM(total_bill)*100) AS tip_percentage
        FROM TIPS
        GROUP BY day,time,smoker
    '''

    df6=pd.read_sql_query(query6,con)
    # show the output of this Read SQL
    print(df6)

except pd.io.sql.DatabaseError as e:
    print(f"Database SQL Execution Error:{e}")

     day    time smoker  tip_percentage
0    Fri  Dinner     No            14.0
1    Fri  Dinner    Yes            15.0
2    Fri   Lunch     No            19.0
3    Fri   Lunch    Yes            19.0
4    Sat  Dinner     No            16.0
5    Sat  Dinner    Yes            14.0
6    Sun  Dinner     No            15.0
7    Sun  Dinner    Yes            15.0
8   Thur  Dinner     No            16.0
9   Thur   Lunch     No            16.0
10  Thur   Lunch    Yes            16.0


7.Retrieve the total bill, tip amount, and tip percentage for each sex, sorted by total bill in descending order, and limit the results to the top 5 records

In [12]:
try:
    # query of the 7th read task 
    query7 = '''SELECT sex, SUM(total_bill), SUM(tip), ROUND((SUM(tip) / SUM(total_bill)) * 100) AS tip_percentage 
        FROM TIPS
        GROUP BY sex
        ORDER BY total_bill DESC 
        LIMIT 5;'''

    df7 = pd.read_sql_query(query7, con)
    # show the output of this Read SQL
    print(df7)
    
except pd.io.sql.DatabaseError as e:
    print(f"Database SQL Execution Error:{e}")

      sex  SUM(total_bill)  SUM(tip)  tip_percentage
0  Female          1570.95    246.51            16.0
1    Male          3256.82    485.07            15.0


8.Find the maximum and minimum tip percentage for each day and time combination, along with the corresponding total bill and tip amount

In [13]:
try:
    # query of the 8th read task 
    query8 = '''
    SELECT *
    FROM(
    SELECT SUM(tip) as sum_tips, SUM(total_bill) as sum_total_bill, ROUND(SUM(tip)/SUM(total_bill)*100) AS percentage
    FROM TIPS
    GROUP BY day,time)
    WHERE percentage >= (
    SELECT MAX(percentage)
    FROM(
    SELECT SUM(tip) as sum_tips, SUM(total_bill) as sum_total_bill, ROUND(SUM(tip)/SUM(total_bill)*100) AS percentage
    FROM TIPS
    GROUP BY day,time) ) 
    OR percentage <= (
    SELECT MIN(percentage)
    FROM(
    SELECT SUM(tip) as sum_tips, SUM(total_bill) as sum_total_bill, ROUND(SUM(tip)/SUM(total_bill)*100) AS percentage
    FROM TIPS
    GROUP BY day,time))'''

    df8 = pd.read_sql_query(query8, con)
    # show the output of this Read SQL
    print(df8)

except pd.io.sql.DatabaseError as e:
    print(f"Database SQL Execution Error:{e}")

   sum_tips  sum_total_bill  percentage
0     35.28          235.96        15.0
1     16.68           89.92        19.0
2    260.40         1778.40        15.0
3    247.39         1627.16        15.0


9. Retrieve the total bill, tip amount, and tip percentage for parties of size 4 or more, where the tip percentage is greater than 15%, and the total bill is between $50 and $100

In [14]:
try:
    # query of the 9th read task 
   query9 = '''
   SELECT SUM(total_bill) AS total_bill, 
         SUM(tip) AS total_tip, 
         ROUND((SUM(tip) / SUM(total_bill)) * 100) AS tip_percentage
   FROM TIPS
   WHERE size >= 4
   GROUP BY size
   HAVING tip_percentage > 15 
      AND SUM(total_bill) BETWEEN 50 AND 100;
   '''

   df9 = pd.read_sql_query(query9, con)
   # show the output of this Read SQL
   print(df9)

except pd.io.sql.DatabaseError as e:
    print(f"Database SQL Execution Error:{e}")


Empty DataFrame
Columns: [total_bill, total_tip, tip_percentage]
Index: []


10. Find the average tip percentage for each combination of day, time, and smoker status, but only include combinations with more than 5 records

In [15]:
try:
    # query of the 10th read task 
    query10 = '''
    SELECT day, time, smoker, ROUND(AVG(tip / total_bill * 100)) AS avg_tip_percentage 
    FROM TIPS 
    GROUP BY day, time, smoker
    HAVING COUNT(*) > 5;
    '''

    df10 = pd.read_sql_query(query10, con)
    # show the output of this Read SQL
    print(df10)

except pd.io.sql.DatabaseError as e:
    print(f"Database SQL Execution Error:{e}")

    day    time smoker  avg_tip_percentage
0   Fri  Dinner    Yes                17.0
1   Fri   Lunch    Yes                19.0
2   Sat  Dinner     No                16.0
3   Sat  Dinner    Yes                15.0
4   Sun  Dinner     No                16.0
5   Sun  Dinner    Yes                19.0
6  Thur   Lunch     No                16.0
7  Thur   Lunch    Yes                16.0


1st addtional Querry:
Retrieve the tips from descending order

In [16]:
try:
    # query of the 1st addtional read task 
    query11 = '''
    SELECT total_bill, tip
    FROM TIPS
    ORDER BY tip DESC
    '''

    df11 = pd.read_sql_query(query10, con)
    # show the output of this Read SQL
    print(df11)

except pd.io.sql.DatabaseError as e:
    print(f"Database SQL Execution Error:{e}")

    day    time smoker  avg_tip_percentage
0   Fri  Dinner    Yes                17.0
1   Fri   Lunch    Yes                19.0
2   Sat  Dinner     No                16.0
3   Sat  Dinner    Yes                15.0
4   Sun  Dinner     No                16.0
5   Sun  Dinner    Yes                19.0
6  Thur   Lunch     No                16.0
7  Thur   Lunch    Yes                16.0


2nd addtional Querry: Find the total number of smokers and non-smokers

In [17]:
try:
    # query of the 2nd addtional read task 
    query12 = '''
    SELECT smoker, COUNT(*) AS total 
    FROM TIPS 
    GROUP BY smoker;
    '''

    df12 = pd.read_sql_query(query12, con)
    # show the output of this Read SQL
    print(df12)

except pd.io.sql.DatabaseError as e:
    print(f"Database SQL Execution Error:{e}")

  smoker  total
0     No    151
1    Yes     93


3rd additional querry: Retrieve the total_bill for parties of size greater than 2

In [18]:
try:
    # query of the 3rd addtional read task 
    query13 = '''
    SELECT total_bill, size
    FROM TIPS 
    WHERE size >= 2;
    '''

    df13 = pd.read_sql_query(query13, con)
    # show the output of this Read SQL
    print(df13)

except pd.io.sql.DatabaseError as e:
    print(f"Database SQL Execution Error:{e}")

     total_bill  size
0         16.99     2
1         10.34     3
2         21.01     3
3         23.68     2
4         24.59     4
..          ...   ...
235       29.03     3
236       27.18     2
237       22.67     2
238       17.82     2
239       18.78     2

[240 rows x 2 columns]


4th additional querry: Count the number of meals served during lunch and dinner

In [19]:
try:
    # query of the 4th addtional read task 
    query14 = '''
    SELECT time, COUNT(*) AS meals_served 
    FROM TIPS 
    GROUP BY time;
    '''

    df14 = pd.read_sql_query(query14, con)
    # show the output of this Read SQL
    print(df14)

except pd.io.sql.DatabaseError as e:
    print(f"Database SQL Execution Error:{e}")

     time  meals_served
0  Dinner           176
1   Lunch            68


5th additional query: Retrieve total bills for male and female customers on each day

In [20]:
try:
    # query of the 5th addtional read task 
    query15 = '''
    SELECT day, sex, SUM(total_bill) AS total_bill 
    FROM TIPS 
    GROUP BY day, sex;
    '''

    df15 = pd.read_sql_query(query15, con)
    # show the output of this Read SQL
    print(df15)

except pd.io.sql.DatabaseError as e:
    print(f"Database SQL Execution Error:{e}")

    day     sex  total_bill
0   Fri  Female      127.31
1   Fri    Male      198.57
2   Sat  Female      551.05
3   Sat    Male     1227.35
4   Sun  Female      357.70
5   Sun    Male     1269.46
6  Thur  Female      534.89
7  Thur    Male      561.44


### UPDATE

It was determined that there was an error in the database. Please update the record that corresponds to id=10 and set smoker to Yes.

In [21]:

try:
    # Update SQL Sentence
    update_query='''
        UPDATE TIPS
        SET Smoker='Yes'
        WHERE id=10
    '''

    cur.execute(update_query)

    # show the output of this Update SQL
    
    query_readall='''
    SELECT *
    FROM TIPS
    LIMIT 11
    '''

    dftips_afterupdate = pd.read_sql_query(query_readall,con)
    print(dftips_afterupdate)

except Exception as e:
    print(f"Database Update Execution Error:{e}")
    con.rollback()

    id  total_bill   tip     sex smoker  day    time  size
0    0       16.99  1.01  Female     No  Sun  Dinner     2
1    1       10.34  1.66    Male     No  Sun  Dinner     3
2    2       21.01  3.50    Male     No  Sun  Dinner     3
3    3       23.68  3.31    Male     No  Sun  Dinner     2
4    4       24.59  3.61  Female     No  Sun  Dinner     4
5    5       25.29  4.71    Male     No  Sun  Dinner     4
6    6        8.77  2.00    Male     No  Sun  Dinner     2
7    7       26.88  3.12    Male     No  Sun  Dinner     4
8    8       15.04  1.96    Male     No  Sun  Dinner     2
9    9       14.78  3.23    Male     No  Sun  Dinner     2
10  10       10.27  1.71    Male    Yes  Sun  Dinner     2


### DELETE

Delete records from the database that have a total bill that is less than $10. 

In [22]:
try:
     # Delete SQL Sentence
    delete_query='''
    DELETE FROM TIPS
    WHERE total_bill<10
    '''

    
    cur.execute(delete_query)

    # show the output of this Update SQL
    
    query_readall='''
    SELECT *
    FROM TIPS
    '''

    dftips_afterdelete = pd.read_sql_query(query_readall,con)
    print(dftips_afterdelete)

except Exception as e:
    print(f"Database Delete Execution Error:{e}")
    con.rollback()

      id  total_bill   tip     sex smoker   day    time  size
0      0       16.99  1.01  Female     No   Sun  Dinner     2
1      1       10.34  1.66    Male     No   Sun  Dinner     3
2      2       21.01  3.50    Male     No   Sun  Dinner     3
3      3       23.68  3.31    Male     No   Sun  Dinner     2
4      4       24.59  3.61  Female     No   Sun  Dinner     4
..   ...         ...   ...     ...    ...   ...     ...   ...
222  239       29.03  5.92    Male     No   Sat  Dinner     3
223  240       27.18  2.00  Female    Yes   Sat  Dinner     2
224  241       22.67  2.00    Male    Yes   Sat  Dinner     2
225  242       17.82  1.75    Male     No   Sat  Dinner     2
226  243       18.78  3.00  Female     No  Thur  Dinner     2

[227 rows x 8 columns]


In [23]:
# Close the connection with database
cur.close()
con.close()