In [1]:
# Import Necessary Libraries
import sqlite3
import pandas as pd

CREATE

You have been provided with a sample .csv file. Create a SQLite database in Python using the data in this sample .csv file.

In [2]:
"""

Creates a SQLite database connection to 'tips.db', creates a 'tips' table.

Populates it with data from a CSV file. 

If the table already exists, it replaces the existing data with the new data.

"""

try:
    with sqlite3.connect("tips.db") as database:
        data = database.cursor()
        data.execute("""
                    CREATE TABLE tips (
                                        total_bill, 
                                        tip,
                                        sex, 
                                        smoker, 
                                        day, 
                                        time,
                                        size)
                    """)
        database.commit()

        df = pd.read_csv("data/tips.csv")
        df.to_sql('tips', database, if_exists='replace', index=False)

except sqlite3.DatabaseError as e:
    print("An error occurred: ", e)


READ

Answer the following questions (must show your work via SQL queries):

In [3]:
def run_query(query):
    """
    Executes the provided SQL query on the 'tips.db' SQLite database and returns the result as a Pandas DataFrame.

    Parameters:
    query (str): The SQL query to execute.

    Returns:
    pd.DataFrame: First 20 rows of the result as a DataFrame.

    Raises:
    Prints a database error message if an exception occurs.
    
    """
    try:
        with sqlite3.connect("tips.db") as database:
            
            data = database.cursor()
            data.execute(query)
            result = data.fetchall()
    
    except sqlite3.DatabaseError as e:
        print("An error occurred: ", e)

    output = pd.DataFrame(result, columns=[description[0] for description in data.description]).head(20)
    
    return output

In [4]:
def update_and_delete(query):
    """
    Executes a given SQL query on the 'tips.db' database, commits the changes, and fetches all rows from the 'tips' table.

    The results are returned as a pandas DataFrame.
    
    Parameters:
    query (str): The SQL query to execute.

    Returns:
    pd.DataFrame: First 20 rows of the result as a DataFrame.

    Raises:
    Prints a database error message if an exception occurs.
    
    """
    try:
        
        with sqlite3.connect("tips.db") as database:
            data = database.cursor()
            data.execute(query)
            database.commit()
            
            data.execute("SELECT * FROM tips")
            result = data.fetchall()
    
    except sqlite3.DatabaseError as e:
        print("An error occurred: ", e)

    output = pd.DataFrame(result, columns=[description[0] for description in data.description]).head(20)

    return output


Query 1 : Retrieve the average tip percentage for each day of the week

In [5]:
query = """ 
            
            SELECT 
                    day, 
                    ROUND(AVG((tip / total_bill) * 100),2) AS avg_tip_percentage
            
            FROM tips
            
            GROUP BY day

        """

run_query(query)


Unnamed: 0,day,avg_tip_percentage
0,Fri,16.99
1,Sat,15.32
2,Sun,16.69
3,Thur,16.13


Query 2 : Find the maximum and minimum total bill amounts

In [6]:
query = """ 
        
        SELECT MIN(total_bill), MAX(total_bill) 

        FROM tips

        """    

run_query(query)


Unnamed: 0,MIN(total_bill),MAX(total_bill)
0,3.07,50.81


Query 3 : Count the number of parties for each size

In [7]:
query = """
        
        SELECT size, COUNT(*) 
            
        FROM tips 
            
        GROUP BY size
            
        """  
  
run_query(query)

Unnamed: 0,size,COUNT(*)
0,1,4
1,2,156
2,3,38
3,4,37
4,5,5
5,6,4


Query 4 : Retrieve the total bill and tip for parties of size 4 or more, where the tip percentage is greater than 15%


In [8]:
query = """ 
        
        SELECT total_bill, tip 
                 
        FROM tips 
                 
        WHERE 
                size >= 4 AND 
                ROUND((tip / total_bill) * 100,2) > 15

        """

run_query(query)


Unnamed: 0,total_bill,tip
0,25.29,4.71
1,18.43,3.0
2,39.42,7.58
3,30.4,5.6
4,32.4,6.0
5,25.56,4.34
6,18.29,3.76
7,29.93,5.07
8,34.3,6.7
9,27.05,5.0


Query 5 : Retrieve the total bill, tip amount, and tip percentage for each combination of day and time, sorted by tip percentage in descending order


In [9]:
query = """ 
                 
        SELECT 
                day, 
                time, 
                total_bill, 
                tip, 
                ROUND((tip / total_bill) * 100,2) AS tip_percentage
                 
        FROM tips 
                 
        GROUP BY day, time 
                 
        ORDER BY ROUND((tip / total_bill) * 100,2) 

        """

run_query(query)

Unnamed: 0,day,time,total_bill,tip,tip_percentage
0,Sun,Dinner,16.99,1.01,5.94
1,Fri,Dinner,28.97,3.0,10.36
2,Thur,Lunch,27.2,4.0,14.71
3,Thur,Dinner,18.78,3.0,15.97
4,Sat,Dinner,20.65,3.35,16.22
5,Fri,Lunch,12.16,2.2,18.09


Query 6 : Find the average tip percentage for each combination of day, time, and smoker status

In [10]:
query = """
        
        SELECT 
                day, 
                time, 
                smoker, 
                ROUND(AVG((tip / total_bill) * 100),2) AS avg_tip_percentage
                 
        FROM tips 
               
        GROUP BY day, time, smoker 

        """

run_query(query)

Unnamed: 0,day,time,smoker,avg_tip_percentage
0,Fri,Dinner,No,13.96
1,Fri,Dinner,Yes,16.53
2,Fri,Lunch,No,18.77
3,Fri,Lunch,Yes,18.89
4,Sat,Dinner,No,15.8
5,Sat,Dinner,Yes,14.79
6,Sun,Dinner,No,16.01
7,Sun,Dinner,Yes,18.73
8,Thur,Dinner,No,15.97
9,Thur,Lunch,No,16.03


Query 7 : Retrieve the total bill, tip amount, and tip percentage for each sex, sorted by total bill in descending order, and limit the results to the top 5 records 


In [11]:
query = """ 
                 
        SELECT 
                sex, 
                total_bill, 
                tip, 
                ROUND((tip / total_bill) * 100,2) AS tip_percentage
                
        FROM tips 
                
        ORDER BY total_bill DESC 
        
        LIMIT 5

        """
run_query(query)


Unnamed: 0,sex,total_bill,tip,tip_percentage
0,Male,50.81,10.0,19.68
1,Male,48.33,9.0,18.62
2,Male,48.27,6.73,13.94
3,Male,48.17,5.0,10.38
4,Male,45.35,3.5,7.72


Query 8 : Find the maximum and minimum tip percentage for each day and time combination, along with the corresponding total bill and tip amount


In [12]:
query = """ 
                 
        SELECT 
                day, 
                time, 
                total_bill, 
                tip, 
                MAX(ROUND((tip / total_bill) * 100,2)) AS max_tip_percentage,
                MIN(ROUND((tip / total_bill) * 100,2)) AS min_tip_percentage
        
        FROM tips 
        
        GROUP BY day, time

        """

run_query(query)


Unnamed: 0,day,time,total_bill,tip,max_tip_percentage,min_tip_percentage
0,Fri,Dinner,28.97,3.0,26.35,10.36
1,Fri,Lunch,13.42,1.58,25.93,11.77
2,Sat,Dinner,32.83,1.17,32.57,3.56
3,Sun,Dinner,16.99,1.01,71.03,5.94
4,Thur,Dinner,18.78,3.0,15.97,15.97
5,Thur,Lunch,18.64,1.36,26.63,7.3


Query 9 : Retrieve the total bill, tip amount, and tip percentage for parties of size 4 or more, where the tip percentage is greater than 15%, and the total bill is between $50 and $100

In [13]:
query = """ 
    
        SELECT 
                total_bill, 
                tip, 
                ROUND((tip / total_bill) * 100,2) AS tip_percentage
                
        FROM tips 
        
        WHERE 
                size > 4 AND 
                ROUND((tip / total_bill) * 100,2) > 15 AND 
                total_bill BETWEEN 50 AND 100

        """
run_query(query)


Unnamed: 0,total_bill,tip,tip_percentage


Query 10 : Find the average tip percentage for each combination of day, time, and smoker status, but only include combinations with more than 5 records


In [14]:
query = """ 
        
            SELECT 
                    day, 
                    time, 
                    smoker, 
                    ROUND(AVG((tip / total_bill) * 100),2) AS avg_tip_percentage
            
            FROM tips 
            
            GROUP BY day, time, smoker 
            
            HAVING COUNT(*) > 5

        """

run_query(query)


Unnamed: 0,day,time,smoker,avg_tip_percentage
0,Fri,Dinner,Yes,16.53
1,Fri,Lunch,Yes,18.89
2,Sat,Dinner,No,15.8
3,Sat,Dinner,Yes,14.79
4,Sun,Dinner,No,16.01
5,Sun,Dinner,Yes,18.73
6,Thur,Lunch,No,16.03
7,Thur,Lunch,Yes,16.39


Come up with your own SQL queries for the table. You must show 5 additional queries beyond the 10 outlined above. 

Query 11 : Find the sum of total bill for each combination of time and day

In [15]:
query = """ 
                 
            SELECT 
                    day, 
                    time, 
                    SUM(total_bill) 
            
            FROM tips 
                
            GROUP BY time, day
                     
        """

run_query(query)


Unnamed: 0,day,time,SUM(total_bill)
0,Fri,Dinner,235.96
1,Sat,Dinner,1778.4
2,Sun,Dinner,1627.16
3,Thur,Dinner,18.78
4,Fri,Lunch,89.92
5,Thur,Lunch,1077.55


Query 12 : Find the average total bill and tip for each sex

In [16]:
query = """ 
            
            SELECT 
                    sex, 
                    ROUND(AVG(total_bill),2),
                    ROUND(AVG(tip),2)
                
            FROM tips 
                
            GROUP BY sex
                     
        """

run_query(query)


Unnamed: 0,sex,"ROUND(AVG(total_bill),2)","ROUND(AVG(tip),2)"
0,Female,18.06,2.83
1,Male,20.74,3.09


Query 13 : Retrieve the highest tip given by each smoker status

In [17]:
query = """ 
                 
            SELECT smoker, MAX(tip)
            
            FROM tips 
                
            GROUP BY smoker
                     
        """

run_query(query)

Unnamed: 0,smoker,MAX(tip)
0,No,9.0
1,Yes,10.0


Query 14 : Retrieve the top 3 days with the highest sum of total bill amounts

In [18]:
query = """ 
                 
            SELECT day, SUM(total_bill)
            
            FROM tips 
                
            GROUP BY day
                
            ORDER BY SUM(total_bill) DESC 
                
            LIMIT 3
                
                     
        """

run_query(query)

Unnamed: 0,day,SUM(total_bill)
0,Sat,1778.4
1,Sun,1627.16
2,Thur,1096.33


Query 15 : Find the total tips collected for each combination of day and time where the tip percentage is above 20%

In [19]:
query = """ 
                 
            SELECT day, time, SUM(tip)
                
            FROM tips 
            
            WHERE ROUND((tip / total_bill) * 100,2) > 20
            
            GROUP BY day, time
                
        """

run_query(query)

Unnamed: 0,day,time,SUM(tip)
0,Fri,Dinner,6.8
1,Fri,Lunch,5.4
2,Sat,Dinner,38.13
3,Sun,Dinner,59.92
4,Thur,Lunch,36.21


UPDATE

It was determined that there was an error in the database. Please update the record that corresponds to id=10 and set smoker to Yes. 

In [20]:
query = """ 
            
            UPDATE tips
                
            SET smoker = 'Yes'
                
            WHERE ROWID = 10;
                 
        """

update_and_delete(query)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
5,25.29,4.71,Male,No,Sun,Dinner,4
6,8.77,2.0,Male,No,Sun,Dinner,2
7,26.88,3.12,Male,No,Sun,Dinner,4
8,15.04,1.96,Male,No,Sun,Dinner,2
9,14.78,3.23,Male,Yes,Sun,Dinner,2


DELETE

Delete records from the database that have a total bill that is less than $10. 

In [21]:
query = """ 
            DELETE

            FROM tips

            WHERE total_bill < 10
                 
        """

update_and_delete(query)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
5,25.29,4.71,Male,No,Sun,Dinner,4
6,26.88,3.12,Male,No,Sun,Dinner,4
7,15.04,1.96,Male,No,Sun,Dinner,2
8,14.78,3.23,Male,Yes,Sun,Dinner,2
9,10.27,1.71,Male,No,Sun,Dinner,2
