In [3]:
import pandas as pd
import sqlite3

In [4]:
df = pd.read_csv('data/tips.csv')
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


### Create sqlite database using files from folder 'data'

In [28]:
connect = sqlite3.connect('tips.db')
c = connect.cursor()
c.execute('''CREATE Table tips (total_bill, tip, sex, smoker, day, time, size)''')
connect.commit()
connect.close()

df

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.50,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3
240,27.18,2.00,Female,Yes,Sat,Dinner,2
241,22.67,2.00,Male,Yes,Sat,Dinner,2
242,17.82,1.75,Male,No,Sat,Dinner,2


### Insert Data to the Database

In [71]:
connect = sqlite3.connect('tips.db')
c = connect.cursor()

df = pd.read_csv('data/tips.csv')
df.to_sql('tips', connect, if_exists='replace', index = False)

connect.commit()
connect.close()

#### Q1. Retrieve the average tip % for each day of the week

In [77]:
# Average tip % = The (sum of all tips of a particular day)* 100/ (sum of all total bills of a particular day) 

connect = sqlite3.connect('tips.db')
c = connect.cursor()

c.execute('''
     SELECT day, ROUND(SUM(tip)/SUM(total_bill)*100, 2) AS avg_tip
     FROM tips
     GROUP BY day
''')
df_avg_tip = pd.DataFrame(c.fetchall(), columns = ['day', 'avg_tip'])
df_avg_tip


Unnamed: 0,day,avg_tip
0,Fri,15.94
1,Sat,14.64
2,Sun,15.2
3,Thur,15.67


#### Q2. Find the maximum and minimum total bull amounts

In [79]:

connect = sqlite3.connect('tips.db')
c = connect.cursor()
c.execute('''
    SELECT MAX(total_bill) AS max_bill, MIN(total_bill) As min_bill
    FROM tips
''')
df_bill = pd.DataFrame(c.fetchall(), columns = ['max_bill', 'min_bill'])
df_bill

Unnamed: 0,max_bill,min_bill
0,50.81,3.07


#### Q3. Count the number of parties for each size

In [81]:
connect = sqlite3.connect('tips.db')
c = connect.cursor()
c.execute('''
    SELECT size, COUNT(size) AS count
    FROM tips
    GROUP BY size
''')
df_count = pd.DataFrame(c.fetchall(), columns = ['size', 'count'])
df_count

Unnamed: 0,size,count
0,1,4
1,2,156
2,3,38
3,4,37
4,5,5
5,6,4


#### Q4. Retrieve the total bill and tip for parties of size 4 or more, where the tip percentage is greater than 15%

In [87]:
connect = sqlite3.connect('tips.db')
c = connect.cursor()

c.execute('''
    SELECT total_bill, tip, size, ROUND(tip/total_bill * 100, 2) AS tip_percentage
    FROM tips 
    WHERE size > 3 AND ROUND(tip/total_bill*100, 2) > 15

''')
df_temp = pd.DataFrame(c.fetchall(), columns=['total_bill', 'tip', 'size', 'tip_percentage'])
df_temp


Unnamed: 0,total_bill,tip,size,tip_percentage
0,25.29,4.71,4,18.62
1,18.43,3.0,4,16.28
2,39.42,7.58,4,19.23
3,30.4,5.6,4,18.42
4,32.4,6.0,4,18.52
5,25.56,4.34,4,16.98
6,18.29,3.76,4,20.56
7,29.93,5.07,4,16.94
8,34.3,6.7,6,19.53
9,27.05,5.0,6,18.48


#### Q5. Retrieve the total bill, tip amount, and tip % for each combination of day and time, sorted by tip % in descending order. 

In [91]:
connect = sqlite3.connect('tips.db')
c = connect.cursor()

c.execute('''
    SELECT day, time, SUM(total_bill) AS total_bill , SUM(tip) AS total_tip, ROUND(tip/total_bill * 100, 2) AS tip_percentage
    FROM tips     
    GROUP BY day, time
    ORDER BY tip_percentage DESC
''')
df_temp = pd.DataFrame(c.fetchall(), columns=['day', 'time','total_bill', 'tip', 'tip_percentage'])
df_temp

Unnamed: 0,day,time,total_bill,tip,tip_percentage
0,Fri,Lunch,89.92,16.68,18.09
1,Sat,Dinner,1778.4,260.4,16.22
2,Thur,Dinner,18.78,3.0,15.97
3,Thur,Lunch,1077.55,168.83,14.71
4,Fri,Dinner,235.96,35.28,10.36
5,Sun,Dinner,1627.16,247.39,5.94
