##### CREATE SQLite database from CSV

In [7]:
import sqlite3 as sql
import pandas as pd
import csv

connection = sql.connect('data/tips.db')
cursor = connection.cursor()

def query():
    csv_file = 'data/tips.csv'
    table_name = 'tips'
    
    try:
        with open(csv_file, newline='') as t:
            reader=csv.reader(t)
            column_header= next(reader) #  list that contains the names of the columns from tips.csv
            rows = [row for row in reader] # iterates through the data excluding the columns
    except FileNotFoundError:
        print(f"Error: The file {csv_file} was not found.")
        return
    except Exception as e:
        print(e)
        
    try: 
        columns = ', '.join(f'"{col}" TEXT' for col in column_header) #Creating string to represent columns
        creating_table = 'CREATE TABLE IF NOT EXISTS tips ("total_bill" TEXT, "tip" TEXT, "sex" TEXT, "smoker" TEXT, "day" TEXT, "time" TEXT, "size" TEXT);' # Creating columns from variable if table does not exist
        cursor.execute(creating_table)

        cursor.execute(f'DELETE FROM {table_name}')
        
        '''
        The first join method in the line below combines all the column names in column_header into a single string with comma separation

        The join with the "?" creates placeholders the length of # of columns for the actual values that will be inserted into tips.db
        '''
        populate_table_query = f'INSERT INTO {table_name} ({", ".join(column_header)}) VALUES ({", ".join(["?" for _ in column_header])});'

        for row in rows:
            cursor.execute(populate_table_query, row) # Row contains all the data underneath the columns

        connection.commit()

        # Below is not mandatory but it prints out tips.db to ensure the db has been populated correctly
        sql_df = pd.read_sql_query(f'SELECT * FROM {table_name}', connection)
        print(sql_df)
    except sql.Error as e:
        print(f"Database Error: {e}")
    except Exception as e:
        print(e)

query()


    total_bill   tip     sex smoker   day    time size
0        16.99  1.01  Female     No   Sun  Dinner    2
1        10.34  1.66    Male     No   Sun  Dinner    3
2        21.01   3.5    Male     No   Sun  Dinner    3
3        23.68  3.31    Male     No   Sun  Dinner    2
4        24.59  3.61  Female     No   Sun  Dinner    4
..         ...   ...     ...    ...   ...     ...  ...
239      29.03  5.92    Male     No   Sat  Dinner    3
240      27.18   2.0  Female    Yes   Sat  Dinner    2
241      22.67   2.0    Male    Yes   Sat  Dinner    2
242      17.82  1.75    Male     No   Sat  Dinner    2
243      18.78   3.0  Female     No  Thur  Dinner    2

[244 rows x 7 columns]


##### READ
##### 1. Retrieve the average tip percentage for each day of the week

In [9]:
query = '''
SELECT day, ROUND(AVG((tip/total_bill) * 100), 2) AS avg_tip_percentage
FROM tips
GROUP BY day
'''
result = cursor.execute(query)
for row in result:
    print(row)

('Fri', 16.99)
('Sat', 15.32)
('Sun', 16.69)
('Thur', 16.13)


##### 2. Find the maximum and minimum total bill amounts

In [10]:
query = '''
SELECT MAX(total_bill), MIN(total_bill)
FROM tips
'''
result = cursor.execute(query)
for row in result:
    print(row)

('9.94', '10.07')


##### 3. Count the number of parties for each size

In [12]:
query = '''
SELECT size, COUNT(size) AS size_count
FROM tips
GROUP BY size
'''
result = cursor.execute(query)
for row in result:
    print(row)

('1', 4)
('2', 156)
('3', 38)
('4', 37)
('5', 5)
('6', 4)


##### 4. Retrieve the total bill and tip for parties of size 4 or more, where the tip percentage is greater than 15%

In [11]:
query = '''
SELECT total_bill, tip
FROM tips
WHERE size >= 4 AND ((tip/total_bill) * 100) > 15
'''
result = cursor.execute(query)
for row in result:
    print(row)

('25.29', '4.71')
('18.43', '3.0')
('39.42', '7.58')
('30.4', '5.6')
('32.4', '6.0')
('25.56', '4.34')
('18.29', '3.76')
('29.93', '5.07')
('34.3', '6.7')
('27.05', '5.0')
('29.85', '5.14')
('21.5', '3.5')
('23.17', '6.5')
('20.69', '5.0')
('20.53', '4.0')
('25.89', '5.16')
('48.33', '9.0')


##### 5. Retrieve the total bill, tip amount, and tip percentage for each combination of day and time, sorted by tip percentage in descending order

In [13]:
query = '''
SELECT total_bill, tip, ROUND((tip/total_bill) * 100, 2) AS tip_percentage
FROM tips
GROUP BY day, time
ORDER BY tip_percentage DESC

'''
result = cursor.execute(query)
for row in result:
    print(row)

('12.16', '2.2', 18.09)
('20.65', '3.35', 16.22)
('18.78', '3.0', 15.97)
('27.2', '4.0', 14.71)
('28.97', '3.0', 10.36)
('16.99', '1.01', 5.94)


##### 6. Find the average tip percentage for each combination of day, time, and smoker status

In [14]:
query = '''
SELECT day, time, smoker, ROUND(AVG((tip/total_bill) * 100), 2) AS avg_tip_percentage
FROM tips
GROUP BY day, time, smoker
'''
result = cursor.execute(query)
for row in result:
    print(row)

('Fri', 'Dinner', 'No', 13.96)
('Fri', 'Dinner', 'Yes', 16.53)
('Fri', 'Lunch', 'No', 18.77)
('Fri', 'Lunch', 'Yes', 18.89)
('Sat', 'Dinner', 'No', 15.8)
('Sat', 'Dinner', 'Yes', 14.79)
('Sun', 'Dinner', 'No', 16.01)
('Sun', 'Dinner', 'Yes', 18.73)
('Thur', 'Dinner', 'No', 15.97)
('Thur', 'Lunch', 'No', 16.03)
('Thur', 'Lunch', 'Yes', 16.39)


##### 7. Retrieve the total bill, tip amount, and tip percentage for each sex, sorted by total bill in descending order, and limit the results to the top 5 records

In [15]:
query1 = '''
SELECT total_bill, tip, ROUND((tip/total_bill) * 100, 2) AS tip_percentage
FROM tips
WHERE sex = 'Male'
ORDER BY total_bill DESC
LIMIT 5
'''
result = cursor.execute(query1)
for row in result:
    print(row)
    
query2 = '''
SELECT total_bill, tip, ROUND((tip/total_bill) * 100, 2) AS tip_percentage
FROM tips
WHERE sex = 'Female'
ORDER BY total_bill DESC
LIMIT 5
'''
result = cursor.execute(query2)
for row in result:
    print(row)

('9.94', '1.56', 15.69)
('9.78', '1.73', 17.69)
('9.68', '1.32', 13.64)
('9.55', '1.45', 15.18)
('8.77', '2.0', 22.81)
('9.6', '4.0', 41.67)
('8.51', '1.25', 14.69)
('8.35', '1.5', 17.96)
('7.25', '1.0', 13.79)
('5.75', '1.0', 17.39)


##### 8. Find the maximum and minimum tip percentage for each day and time combination, along with the corresponding total bill and tip amount

In [16]:
query = '''
SELECT ROUND(MAX((tip / total_bill) * 100), 2) AS max_tip_percentage, Round(MIN((tip / total_bill) * 100), 2) AS min_tip_percentage, total_bill, tip
FROM tips
GROUP BY day, time
'''
result = cursor.execute(query)
for row in result:
    print(row)

(26.35, 10.36, '28.97', '3.0')
(25.93, 11.77, '13.42', '1.58')
(32.57, 3.56, '32.83', '1.17')
(71.03, 5.94, '16.99', '1.01')
(15.97, 15.97, '18.78', '3.0')
(26.63, 7.3, '18.64', '1.36')


##### 9. Retrieve the total bill, tip amount, and tip percentage for parties of size 4 or more, where the tip percentage is greater than 15%, and the total bill is between $50 and $100

In [17]:
query = '''
SELECT total_bill, tip, ROUND((tip / total_bill) * 100, 2) AS tip_percentage
FROM tips
WHERE size >= 4
AND((tip / total_bill) * 100) > 15 
AND total_bill >= 50 AND total_bill <=100
'''
result = cursor.execute(query)
for row in result:
    print(row)

##### 10. Find the average tip percentage for each combination of day, time, and smoker status, but only include combinations with more than 5 records


In [18]:
query = '''
SELECT day, time, smoker, ROUND(AVG((tip / total_bill) * 100), 2) AS avg_tip_percentage
FROM tips
GROUP BY day, time, smoker
HAVING COUNT(*) > 5
'''
result = cursor.execute(query)
for row in result:
    print(row)

('Fri', 'Dinner', 'Yes', 16.53)
('Fri', 'Lunch', 'Yes', 18.89)
('Sat', 'Dinner', 'No', 15.8)
('Sat', 'Dinner', 'Yes', 14.79)
('Sun', 'Dinner', 'No', 16.01)
('Sun', 'Dinner', 'Yes', 18.73)
('Thur', 'Lunch', 'No', 16.03)
('Thur', 'Lunch', 'Yes', 16.39)


##### Additional Queries  
##### 1. Find the average bill and tip percentage for smokers and non smokers

In [19]:
query = '''
SELECT smoker, ROUND(AVG(total_bill), 2), ROUND(AVG((tip/total_bill) * 100), 2) AS avg_tip_percentage
FROM tips
GROUP BY smoker
'''
result = cursor.execute(query)
for row in result:
    print(row)

('No', 19.19, 15.93)
('Yes', 20.76, 16.32)


##### 2. Retrieve the average total bill and tip amount for each day

In [20]:
query = '''
    SELECT day, ROUND(AVG(total_bill), 2) AS avg_total_bill, ROUND(AVG(tip), 2) AS avg_tip
    FROM tips
    GROUP BY day;
'''
result = cursor.execute(query)
for row in result:
    print(row)

('Fri', 17.15, 2.73)
('Sat', 20.44, 2.99)
('Sun', 21.41, 3.26)
('Thur', 17.68, 2.77)


##### 3. Retrieve the total bill and tip for the smallest and largest group sizes

In [29]:
query1 = '''
    SELECT total_bill, tip
    FROM tips
    WHERE size = (SELECT MIN(size) FROM tips)
    LIMIT 1;
'''
result = cursor.execute(query1)
for row in result:
    print(row)

query2 = '''
    SELECT total_bill, tip
    FROM tips
    WHERE size = (SELECT MAX(size) FROM tips)
    LIMIT 1;
'''
result = cursor.execute(query2)
for row in result:
    print(row)

('3.07', '1.0')
('29.8', '4.2')


##### 4. Find the count for smokers of each sex

In [27]:
query = '''
SELECT sex, COUNT(*) AS smoker_count
FROM tips
WHERE smoker = "Yes"
GROUP BY sex
'''
result = cursor.execute(query)
for row in result:
    print(row)

('Female', 33)
('Male', 61)


##### 5. For each sex, calculate the average total bill and tip amount

In [23]:
query = '''
SELECT sex, ROUND(AVG(total_bill) , 2) AS avg_total_bill, ROUND(AVG(tip), 2) AS avg_tip
FROM tips
GROUP BY sex
'''
result = cursor.execute(query)
for row in result:
    print(row)

('Female', 18.06, 2.83)
('Male', 20.74, 3.09)


##### UPDATE the record that corresponds to id=10 and set smoker to Yes.

In [24]:
query = '''
UPDATE tips 
SET smoker = 'Yes' 
WHERE ROWID = 10;
'''
result = cursor.execute(query)
for row in result:
    print(row)

##### DELETE records from the database that have a total bill that is less than $10.

In [25]:
query = '''
    DELETE 
    FROM tips 
    WHERE total_bill < 10;
'''
result = cursor.execute(query)
for row in result:
    print(row)

##### Close Connection

In [None]:
if connection:
    connection.close()