##### CREATE SQLite database from CSV

In [None]:
import sqlite3 as sql
import pandas as pd
import csv

connection = sql.connect('data/tips.db')
cursor = connection.cursor()

def query():
    csv_file = 'data/tips.csv'
    table_name = 'tips'
    
    try:
        with open(csv_file, newline='') as t:
            reader=csv.reader(t)
            column_header= next(reader) #  list that contains the names of the columns from tips.csv
            rows = [row for row in reader] # iterates through the data excluding the columns
    except FileNotFoundError:
        print(f"Error: The file {csv_file} was not found.")
        return
    except Exception as e:
        print(e)
        
    try: 
        columns = ', '.join(f'"{col}" TEXT' for col in column_header) #Creating string to represent columns
        creating_table = 'CREATE TABLE IF NOT EXISTS tips ("total_bill" TEXT, "tip" TEXT, "sex" TEXT, "smoker" TEXT, "day" TEXT, "time" TEXT, "size" TEXT);' # Creating columns from variable if table does not exist
        cursor.execute(creating_table)

        cursor.execute(f'DELETE FROM {table_name}')
        
        '''
        The first join method in the line below combines all the column names in column_header into a single string with comma separation

        The join with the "?" creates placeholders the length of # of columns for the actual values that will be inserted into tips.db
        '''
        populate_table_query = f'INSERT INTO {table_name} ({", ".join(column_header)}) VALUES ({", ".join(["?" for _ in column_header])});'

        for row in rows:
            cursor.execute(populate_table_query, row) # Row contains all the data underneath the columns

        connection.commit()

        # Below is not mandatory but it prints out tips.db to ensure the db has been populated correctly
        sql_df = pd.read_sql_query(f'SELECT * FROM {table_name}', connection)
        print(sql_df)
    except sql.Error as e:
        print(f"Database Error: {e}")
    except Exception as e:
        print(e)

query()


##### READ
##### 1. Retrieve the average tip percentage for each day of the week

In [None]:
query = '''
SELECT day, ROUND(AVG((tip/total_bill) * 100), 2) AS avg_tip_percentage
FROM tips
GROUP BY day
'''
result = cursor.execute(query)
for row in result:
    print(row)

##### 2. Find the maximum and minimum total bill amounts

In [None]:
query = '''
SELECT MAX(total_bill), MIN(total_bill)
FROM tips
'''
result = cursor.execute(query)
for row in result:
    print(row)

##### 3. Count the number of parties for each size

In [None]:
query = '''
SELECT size, COUNT(size) AS size_count
FROM tips
GROUP BY size
'''
result = cursor.execute(query)
for row in result:
    print(row)

##### 4. Retrieve the total bill and tip for parties of size 4 or more, where the tip percentage is greater than 15%

In [None]:
query = '''
SELECT total_bill, tip
FROM tips
WHERE size >= 4 AND ((tip/total_bill) * 100) > 15
'''
result = cursor.execute(query)
for row in result:
    print(row)

##### 5. Retrieve the total bill, tip amount, and tip percentage for each combination of day and time, sorted by tip percentage in descending order

In [None]:
query = '''
SELECT total_bill, tip, ROUND((tip/total_bill) * 100, 2) AS tip_percentage
FROM tips
GROUP BY day, time
ORDER BY tip_percentage DESC

'''
result = cursor.execute(query)
for row in result:
    print(row)

##### 6. Find the average tip percentage for each combination of day, time, and smoker status

In [None]:
query = '''
SELECT day, time, smoker, ROUND(AVG((tip/total_bill) * 100), 2) AS avg_tip_percentage
FROM tips
GROUP BY day, time, smoker
'''
result = cursor.execute(query)
for row in result:
    print(row)

##### 7. Retrieve the total bill, tip amount, and tip percentage for each sex, sorted by total bill in descending order, and limit the results to the top 5 records

In [None]:
query1 = '''
SELECT total_bill, tip, ROUND((tip/total_bill) * 100, 2) AS tip_percentage
FROM tips
WHERE sex = 'Male'
ORDER BY total_bill DESC
LIMIT 5
'''
result = cursor.execute(query1)
for row in result:
    print(row)
    
query2 = '''
SELECT total_bill, tip, ROUND((tip/total_bill) * 100, 2) AS tip_percentage
FROM tips
WHERE sex = 'Female'
ORDER BY total_bill DESC
LIMIT 5
'''
result = cursor.execute(query2)
for row in result:
    print(row)

##### 8. Find the maximum and minimum tip percentage for each day and time combination, along with the corresponding total bill and tip amount

In [None]:
query = '''
SELECT ROUND(MAX((tip / total_bill) * 100), 2) AS max_tip_percentage, Round(MIN((tip / total_bill) * 100), 2) AS min_tip_percentage, total_bill, tip
FROM tips
GROUP BY day, time
'''
result = cursor.execute(query)
for row in result:
    print(row)

##### 9. Retrieve the total bill, tip amount, and tip percentage for parties of size 4 or more, where the tip percentage is greater than 15%, and the total bill is between $50 and $100

In [None]:
query = '''
SELECT total_bill, tip, ROUND((tip / total_bill) * 100, 2) AS tip_percentage
FROM tips
WHERE size >= 4
AND((tip / total_bill) * 100) > 15 
AND total_bill >= 50 AND total_bill <=100
'''
result = cursor.execute(query)
for row in result:
    print(row)

##### 10. Find the average tip percentage for each combination of day, time, and smoker status, but only include combinations with more than 5 records


In [None]:
query = '''
SELECT day, time, smoker, ROUND(AVG((tip / total_bill) * 100), 2) AS avg_tip_percentage
FROM tips
GROUP BY day, time, smoker
HAVING COUNT(*) > 5
'''
result = cursor.execute(query)
for row in result:
    print(row)

##### Additional Queries  
##### 1. Find the average bill and tip percentage for smokers and non smokers

In [None]:
query = '''
SELECT smoker, ROUND(AVG(total_bill), 2), ROUND(AVG((tip/total_bill) * 100), 2) AS avg_tip_percentage
FROM tips
GROUP BY smoker
'''
result = cursor.execute(query)
for row in result:
    print(row)

##### 2. Retrieve the average total bill and tip amount for each day

In [None]:
query = '''
    SELECT day, ROUND(AVG(total_bill), 2) AS avg_total_bill, ROUND(AVG(tip), 2) AS avg_tip
    FROM tips
    GROUP BY day;
'''
result = cursor.execute(query)
for row in result:
    print(row)

##### 3. Retrieve the total bill and tip for the smallest and largest group sizes

In [None]:
query1 = '''
    SELECT total_bill, tip
    FROM tips
    WHERE size = (SELECT MIN(size) FROM tips)
    LIMIT 1;
'''
result = cursor.execute(query1)
for row in result:
    print(row)

query2 = '''
    SELECT total_bill, tip
    FROM tips
    WHERE size = (SELECT MAX(size) FROM tips)
    LIMIT 1;
'''
result = cursor.execute(query2)
for row in result:
    print(row)

##### 4. Find the count for smokers of each sex

In [None]:
query = '''
SELECT sex, COUNT(*) AS smoker_count
FROM tips
WHERE smoker = "Yes"
GROUP BY sex
'''
result = cursor.execute(query)
for row in result:
    print(row)

##### 5. For each sex, calculate the average total bill and tip amount

In [None]:
query = '''
SELECT sex, ROUND(AVG(total_bill) , 2) AS avg_total_bill, ROUND(AVG(tip), 2) AS avg_tip
FROM tips
GROUP BY sex
'''
result = cursor.execute(query)
for row in result:
    print(row)

##### UPDATE the record that corresponds to id=10 and set smoker to Yes.

In [None]:
query = '''
UPDATE tips 
SET smoker = 'Yes' 
WHERE ROWID = 10;
'''
result = cursor.execute(query)
for row in result:
    print(row)

##### DELETE records from the database that have a total bill that is less than $10.

In [None]:
query = '''
    DELETE 
    FROM tips 
    WHERE total_bill < 10;
'''
result = cursor.execute(query)
for row in result:
    print(row)

##### Close Connection

In [None]:
if connection:
    connection.close()