# Import Libraries and Load Data

In [65]:
import pandas as pd
import sqlite3

df = pd.read_csv('data/tips.csv')
conn = sqlite3.connect('tips.db')
c = conn.cursor()
c.execute('''CREATE TABLE tips (total_bill, tip, sex, smoker, day, time, size)''')
df.to_sql('tips', conn, if_exists='replace', index=True, index_label='id')

conn.commit()
conn.close()

# SQL Queries

### 1. Retrieve the average tip percentage for each day of the week

In [67]:
conn = sqlite3.connect('tips.db')
c = conn.cursor()

c.execute('''SELECT DAY, ROUND((SUM(tip)/SUM(total_bill) * 100),2) AS average_tip_percentage FROM tips GROUP BY day''')
print(c.fetchall())

[('Fri', 15.94), ('Sat', 14.64), ('Sun', 15.2), ('Thur', 15.67)]


### 2. Find the maximum and minimum total bill amounts

In [68]:
c.execute('''SELECT MAX(total_bill) AS max_bill, MIN(total_bill) AS min_bill FROM tips''')
print(c.fetchall())

[(50.81, 3.07)]


### 3. Count the number of parties for each size

In [69]:
c.execute('''SELECT size, COUNT(*) AS number_parties FROM tips GROUP BY size''')
print(c.fetchall())

[(1, 4), (2, 156), (3, 38), (4, 37), (5, 5), (6, 4)]


### 4. Retrieve the total bill and tip for parties of size 4 or more, where the tip percentage is greater than 15%

In [70]:
c.execute('''SELECT total_bill, tip FROM tips WHERE size >= 4 AND (tip/total_bill)*100 > 15''')
print(c.fetchall())

[(25.29, 4.71), (18.43, 3.0), (39.42, 7.58), (30.4, 5.6), (32.4, 6.0), (25.56, 4.34), (18.29, 3.76), (29.93, 5.07), (34.3, 6.7), (27.05, 5.0), (29.85, 5.14), (21.5, 3.5), (23.17, 6.5), (20.69, 5.0), (20.53, 4.0), (25.89, 5.16), (48.33, 9.0)]


### 5. Retrieve the total bill, tip amount, and tip percentage for each combination of day and time, sorted by tip percentage in descending order

In [71]:
c.execute('''SELECT day, time, ROUND(SUM(total_bill),2) AS total_bill, SUM(tip) AS total_tip, ROUND((SUM(tip)/SUM(total_bill))*100) AS tip_percentage FROM tips GROUP BY day,time ORDER BY tip_percentage DESC''')
print(c.fetchall())

[('Fri', 'Lunch', 89.92, 16.68, 19.0), ('Thur', 'Dinner', 18.78, 3.0, 16.0), ('Thur', 'Lunch', 1077.55, 168.83, 16.0), ('Fri', 'Dinner', 235.96, 35.28, 15.0), ('Sat', 'Dinner', 1778.4, 260.4, 15.0), ('Sun', 'Dinner', 1627.16, 247.39000000000007, 15.0)]


### 6. Find the average tip percentage for each combination of day, time, and smoker status

In [72]:
c.execute('''SELECT day, time, smoker, ROUND(AVG(tip/total_bill*100),2) AS average_tip_percentage FROM tips GROUP by day,time, smoker''')
print(c.fetchall())

[('Fri', 'Dinner', 'No', 13.96), ('Fri', 'Dinner', 'Yes', 16.53), ('Fri', 'Lunch', 'No', 18.77), ('Fri', 'Lunch', 'Yes', 18.89), ('Sat', 'Dinner', 'No', 15.8), ('Sat', 'Dinner', 'Yes', 14.79), ('Sun', 'Dinner', 'No', 16.01), ('Sun', 'Dinner', 'Yes', 18.73), ('Thur', 'Dinner', 'No', 15.97), ('Thur', 'Lunch', 'No', 16.03), ('Thur', 'Lunch', 'Yes', 16.39)]


### 7. Retrieve the total bill, tip amount, and tip percentage for each sex, sorted by total bill in descending order, and limit the results to the top 5 records

In [73]:
c.execute('''SELECT sex, ROUND(SUM(total_bill),2) AS total_bill, SUM(tip), ROUND((SUM(tip)/SUM(total_bill)*100),2) AS tip_percentage FROM tips GROUP BY sex ORDER BY total_bill DESC LIMIT 5''')
print(c.fetchall())

[('Male', 3256.82, 485.0700000000001, 14.89), ('Female', 1570.95, 246.51, 15.69)]


### 8. Find the maximum and minimum tip percentage for each day and time combination, along with the corresponding total bill and tip amount

In [74]:
c.execute('''SELECT day, time,
          MAX(tip_percentage) AS max_tip_percentage,
          MAX(CASE WHEN tip_percentage = max_tip_percentage THEN total_bill END) AS max_total_bill,
          MAX(CASE WHEN tip_percentage = max_tip_percentage THEN tip END) AS max_tip,
          MIN(tip_percentage) AS min_tip_percentage,
          MIN(CASE WHEN tip_percentage = min_tip_percentage THEN total_bill END) AS min_total_bill,
          MIN(CASE WHEN tip_percentage = min_tip_percentage THEN tip END) AS min_tip
          FROM (SELECT day, time, total_bill, tip, ROUND((tip/total_bill*100),2) as tip_percentage,
          MAX(ROUND((tip/total_bill*100),2)) OVER (PARTITION BY day, time) AS max_tip_percentage,
          MIN(ROUND((tip/total_bill*100),2)) OVER (PARTITION BY day, time) AS min_tip_percentage FROM tips)
          GROUP BY day, time''')
print(c.fetchall())

[('Fri', 'Dinner', 26.35, 16.32, 4.3, 10.36, 28.97, 3.0), ('Fri', 'Lunch', 25.93, 13.42, 3.48, 11.77, 13.42, 1.58), ('Sat', 'Dinner', 32.57, 3.07, 1.0, 3.56, 32.83, 1.17), ('Sun', 'Dinner', 71.03, 7.25, 5.15, 5.94, 16.99, 1.01), ('Thur', 'Dinner', 15.97, 18.78, 3.0, 15.97, 18.78, 3.0), ('Thur', 'Lunch', 26.63, 7.51, 2.0, 7.3, 18.64, 1.36)]


### 9. Retrieve the total bill, tip amount, and tip percentage for parties of size 4 or more, where the tip percentage is greater than 15%, and the total bill is between $50 and $100

In [75]:
c.execute('''SELECT total_bill, tip, ROUND(tip/total_bill*100, 2) AS tip_percentage FROM tips WHERE size >= 4 AND tip_percentage > 15 AND total_bill BETWEEN 50 AND 100''')
print(c.fetchall())

[]


### 10. Find the average tip percentage for each combination of day, time, and smoker status, but only include combinations with more than 5 records

In [76]:
c.execute('''SELECT day, time, smoker, ROUND(AVG(tip/total_bill*100),2) AS average_tip_percentage FROM tips GROUP BY day, time, smoker HAVING COUNT(*) > 5''')
print(c.fetchall())

[('Fri', 'Dinner', 'Yes', 16.53), ('Fri', 'Lunch', 'Yes', 18.89), ('Sat', 'Dinner', 'No', 15.8), ('Sat', 'Dinner', 'Yes', 14.79), ('Sun', 'Dinner', 'No', 16.01), ('Sun', 'Dinner', 'Yes', 18.73), ('Thur', 'Lunch', 'No', 16.03), ('Thur', 'Lunch', 'Yes', 16.39)]


# Additional Queries

### 1. Retrieve the total bill and tip amount for each smoker status

In [77]:
c.execute('''SELECT smoker, ROUND(SUM(total_bill),2) as total_bill, ROUNd(SUM(tip),2) FROM tips GROUP BY smoker ORDER BY total_bill DESC''')
print(c.fetchall())

[('No', 2897.43, 451.77), ('Yes', 1930.34, 279.81)]


### 2. Find the maximum and minimum tip percentage for each size

In [78]:
c.execute('''SELECT size, ROUND(MAX(tip/total_bill*100),2), ROUND(MIN(tip/total_bill*100),2) FROM tips GROUP BY size ''')
print(c.fetchall())

[(1, 32.57, 13.79), (2, 71.03, 3.56), (3, 23.07, 5.64), (4, 28.05, 7.75), (5, 24.17, 6.57), (6, 19.53, 10.38)]


### 3. Find the top 3 day and time combinations that have the highest revenue

In [79]:
c.execute('''SELECT day, time, ROUNd(SUM(total_bill),2) AS revenue FROM tips GROUP BY day, time ORDER BY revenue DESC LIMIT 3''')
print(c.fetchall())

[('Sat', 'Dinner', 1778.4), ('Sun', 'Dinner', 1627.16), ('Thur', 'Lunch', 1077.55)]


### 4. Retrieve the total bill and tip for parties of size 2 or less, where the tip percentage is less than 10%

In [80]:
c.execute('''SELECT total_bill, tip FROM tips WHERE size <= 2 AND tip/total_bill*100 < 10''')
print(c.fetchall())

[(16.99, 1.01), (26.41, 1.5), (20.23, 2.01), (19.08, 1.5), (31.85, 3.18), (32.9, 3.11), (40.55, 3.0), (15.69, 1.5), (28.44, 2.56), (24.27, 2.03), (12.9, 1.1), (12.6, 1.0), (32.83, 1.17), (27.18, 2.0), (22.67, 2.0), (17.82, 1.75)]


### 5. Retrieve the total bill, tip, and tip percentage from all males and smokers, where the total bill is between $20 and $30, and rank in ascending order fo the total bill

In [81]:
c.execute('''SELECT total_bill, tip, ROUND(tip/total_bill*100, 2) FROM tips WHERE sex = 'Male' AND smoker = 'Yes' AND total_bill BETWEEN 20 AND 30 ORDER BY total_bill ASC''')
print(c.fetchall())

[(20.29, 3.21, 15.82), (20.49, 4.06, 19.81), (20.53, 4.0, 19.48), (21.01, 3.0, 14.28), (22.67, 2.0, 8.82), (23.1, 4.0, 17.32), (23.17, 6.5, 28.05), (23.33, 5.65, 24.22), (24.01, 2.0, 8.33), (24.27, 2.03, 8.36), (25.21, 4.29, 17.02), (25.89, 5.16, 19.93), (26.59, 3.41, 12.82), (27.28, 4.0, 14.66), (28.15, 3.0, 10.66), (28.44, 2.56, 9.0), (28.97, 3.0, 10.36)]


# UPDATE

### It was determined that there was an error in the database. Please update the record that corresponds to id=10 and set smoker to Yes.

In [84]:
c.execute('''UPDATE tips SET smoker = 'Yes' WHERE id = 10''')
c.execute('''SELECT * FROM tips WHERE id = 10''')
print(c.fetchall())

[(10, 10.27, 1.71, 'Male', 'Yes', 'Sun', 'Dinner', 2)]


# DELETE

### Delete records from the database that have a total bill that is less than $10.

In [85]:
c.execute('''DELETE FROM tips WHERE total_bill < 10''')
c.execute('''SELECT * FROM tips''')
print(c.fetchall())

[(0, 16.99, 1.01, 'Female', 'No', 'Sun', 'Dinner', 2), (1, 10.34, 1.66, 'Male', 'No', 'Sun', 'Dinner', 3), (2, 21.01, 3.5, 'Male', 'No', 'Sun', 'Dinner', 3), (3, 23.68, 3.31, 'Male', 'No', 'Sun', 'Dinner', 2), (4, 24.59, 3.61, 'Female', 'No', 'Sun', 'Dinner', 4), (5, 25.29, 4.71, 'Male', 'No', 'Sun', 'Dinner', 4), (7, 26.88, 3.12, 'Male', 'No', 'Sun', 'Dinner', 4), (8, 15.04, 1.96, 'Male', 'No', 'Sun', 'Dinner', 2), (9, 14.78, 3.23, 'Male', 'No', 'Sun', 'Dinner', 2), (10, 10.27, 1.71, 'Male', 'Yes', 'Sun', 'Dinner', 2), (11, 35.26, 5.0, 'Female', 'No', 'Sun', 'Dinner', 4), (12, 15.42, 1.57, 'Male', 'No', 'Sun', 'Dinner', 2), (13, 18.43, 3.0, 'Male', 'No', 'Sun', 'Dinner', 4), (14, 14.83, 3.02, 'Female', 'No', 'Sun', 'Dinner', 2), (15, 21.58, 3.92, 'Male', 'No', 'Sun', 'Dinner', 2), (16, 10.33, 1.67, 'Female', 'No', 'Sun', 'Dinner', 3), (17, 16.29, 3.71, 'Male', 'No', 'Sun', 'Dinner', 3), (18, 16.97, 3.5, 'Female', 'No', 'Sun', 'Dinner', 3), (19, 20.65, 3.35, 'Male', 'No', 'Sat', 'Dinne