In [172]:
#Import libraries that will be needed
import sqlite3
import pandas as pd
import os

#I need to show all the rows instead of only showing few rows of results.
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

#Creating functions to create sqlite database. using Pandas, pull the csv file.
def create_sqliteDB(DBname):
    return sqlite3.connect(DBname, timeout = 20)

def load_csv(CSVfile):
    return pd.read_csv(CSVfile, header=0)

# #If table already exists, drop the table
def delete_ifexist(database,Tablename):
    cursor = database.cursor()
    cursor.execute(f"DROP TABLE IF EXISTS {Tablename}")
    database.commit()


def write_to_sql(df, database,Tablename):
    df.to_sql(Tablename, database, if_exists = 'replace', index = False)

#Queries to answer questions
def run_queries(cursor, queries):

    #Write SQL query to access tip table.
    for query in queries:

        cursor.execute(query)
        rows=cursor.fetchall()
        col_names = [description[0] for description in cursor.description]

        print(f"executed {query}")
        print(col_names)
        for row in rows:
            print(row)
        print("\n")


#the main function to handle all the job
def main():
    CSVfile = 'data/tips.csv'
    DBname = 'database2.db'
    Tablename = 'tip1'

    df = load_csv(CSVfile)
    database = create_sqliteDB(DBname)
    cursor = database.cursor()
    delete_ifexist(database,Tablename)
    write_to_sql(df, database, Tablename)

    #Write SQLite queries for all the quesitons.
    queries = [
      
        #Retrieve the average tip percentage for each day of the week 
        f'select day, round(avg(tip/total_bill)*100,2) as avg_tip_percentage from {Tablename} group by day',
        
        
        #Answer:
        #['day', 'avg_tip_percentage']
        #('Thur', 16.13)
        #('Fri', 16.99)
        #('Sat', 15.32)
        #('Sun', 16.69)
        


        #Find the maximum and minimum total bill amounts
        f'select max(total_bill) as max_total_bill, min(total_bill) as min_total_bill from {Tablename}',

        
        #Answer:
        #['max_total_bill', 'min_total_bill']
        #(50.81, 3.07)
        

        #Count the number of parties for each size
        f'select size, count(*) as number_of_parties from {Tablename} group by size',
        
        #Answer:
        #['size', 'number_of_parties']
        #(1, 4)
        #(2, 156)
        #(3, 38)
        #(4, 37)
        #(5, 5)
        #(6, 4)
        

        #Retrieve the total bill and tip for parties of size 4 or more, where the tip percentage is greater than 15%
        f'''select total_bill, tip, round((tip/total_bill)*100,2) as tip_percentage 
            from {Tablename} 
            where size >=4 and (tip/total_bill)*100 >15''',

        # Answer:
        # ['total_bill', 'tip', 'tip_percentage']
        # (25.29, 4.71, 18.62)
        # (18.43, 3.0, 16.28)
        # (39.42, 7.58, 19.23)
        # (30.4, 5.6, 18.42)
        # (32.4, 6.0, 18.52)
        # (25.56, 4.34, 16.98)
        # (18.29, 3.76, 20.56)
        # (29.93, 5.07, 16.94)
        # (34.3, 6.7, 19.53)
        # (27.05, 5.0, 18.48)
        # (29.85, 5.14, 17.22)
        # (21.5, 3.5, 16.28)
        # (23.17, 6.5, 28.05)
        # (20.69, 5.0, 24.17)
        # (20.53, 4.0, 19.48)
        # (25.89, 5.16, 19.93)
        # (48.33, 9.0, 18.62)


        #Retrieve the total bill, tip amount, and tip percentage for each combination of day and time, 
        #sorted by tip percentage in descending order
        f'''select day, time, round(sum(total_bill),2) as sum_total_bill, round(sum(tip),2) as sum_total_tip, 
            round((sum(tip)/sum(total_bill))*100,2) as tip_percentage 
            from {Tablename} 
            group by day, time 
            order by tip_percentage desc''',


        # Answer:
        # ['day', 'time', 'sum_total_bill', 'sum_total_tip', 'tip_percentage']
        # ('Fri', 'Lunch', 89.92, 16.68, 18.09)
        # ('Sat', 'Dinner', 1778.4, 260.4, 16.22)
        # ('Thur', 'Dinner', 18.78, 3.0, 15.97)
        # ('Thur', 'Lunch', 1077.55, 168.83, 14.71)
        # ('Fri', 'Dinner', 235.96, 35.28, 10.36)
        # ('Sun', 'Dinner', 1627.16, 247.39, 5.94)


        #Find the average tip percentage for each combination of day, 
        #time, and smoker status
        f'''select day, time, smoker, round(avg(tip/total_bill)*100,2) as avg_tip_percentage 
            from {Tablename} 
            group by day, time, smoker''',


        # Answer:
        # ['day', 'time', 'smoker', 'avg_tip_percentage']
        # ('Fri', 'Dinner', 'No', 13.96)
        # ('Fri', 'Dinner', 'Yes', 16.53)
        # ('Fri', 'Lunch', 'No', 18.77)
        # ('Fri', 'Lunch', 'Yes', 18.89)
        # ('Sat', 'Dinner', 'No', 15.8)
        # ('Sat', 'Dinner', 'Yes', 14.79)
        # ('Sun', 'Dinner', 'No', 16.01)
        # ('Sun', 'Dinner', 'Yes', 18.73)
        # ('Thur', 'Dinner', 'No', 15.97)
        # ('Thur', 'Lunch', 'No', 16.03)
        # ('Thur', 'Lunch', 'Yes', 16.39)


        #Retrieve the total bill, tip amount, and tip percentage for each sex, sorted by total bill in descending order, and limit the results to the top 5 records
        f'''select sex, total_bill , tip ,round((tip/total_bill)*100,2) as tip_percentage 
            from {Tablename} 
            where sex = 'Male'
            order by total_bill desc 
            limit 5
          ''',  


        # Answer for Male:
        # ['sex', 'total_bill', 'tip', 'tip_percentage']
        # ('Male', 50.81, 10.0, 19.68)
        # ('Male', 48.33, 9.0, 18.62)
        # ('Male', 48.27, 6.73, 13.94)
        # ('Male', 48.17, 5.0, 10.38)
        # ('Male', 45.35, 3.5, 7.72)


        f'''select sex, total_bill , tip ,round((tip/total_bill)*100,2) as tip_percentage 
            from {Tablename} 
            where sex = 'Female'
            order by total_bill desc 
            limit 5
        ''',

        # Answer for Female:
        # ['sex', 'total_bill', 'tip', 'tip_percentage']
        # ('Female', 44.3, 2.5, 5.64)
        # ('Female', 43.11, 5.0, 11.6)
        # ('Female', 35.83, 4.67, 13.03)
        # ('Female', 35.26, 5.0, 14.18)
        # ('Female', 34.83, 5.17, 14.84)


        #Find the maximum and minimum tip percentage for each day and time combination, along with the corresponding total bill and tip amount
        f''' select A.day, A.time, A.total_bill, A.tip, round((tip/total_bill)*100,2) as tip_percentage  
            from {Tablename} as A
            join (select day, time, max(tip/total_bill)*100 as max_tip_percentage
            from {Tablename} 
            group by day, time ) as B 
            on A.day = B.day and A.time=B.time and (A.tip/A.total_bill)*100 = B.max_tip_percentage
            ''',

        # Answer:
        # ['day', 'time', 'total_bill', 'tip', 'tip_percentage']
        # ('Sat', 'Dinner', 3.07, 1.0, 32.57)
        # ('Fri', 'Dinner', 16.32, 4.3, 26.35)
        # ('Thur', 'Lunch', 7.51, 2.0, 26.63)
        # ('Sun', 'Dinner', 7.25, 5.15, 71.03)
        # ('Fri', 'Lunch', 13.42, 3.48, 25.93)
        # ('Thur', 'Dinner', 18.78, 3.0, 15.97)

        
        f''' select A.day, A.time, A.total_bill, A.tip, round((tip/total_bill)*100,2) as tip_percentage  
            from {Tablename} as A
            join (select day, time, min(tip/total_bill)*100 as min_tip_percentage
            from {Tablename} 
            group by day, time ) as B 
            on A.day = B.day and A.time=B.time and (A.tip/A.total_bill)*100 = B.min_tip_percentage
            ''',


        # Answer:
        # ['day', 'time', 'total_bill', 'tip', 'tip_percentage']
        # ('Sun', 'Dinner', 16.99, 1.01, 5.94)
        # ('Fri', 'Dinner', 28.97, 3.0, 10.36)
        # ('Thur', 'Lunch', 18.64, 1.36, 7.3)
        # ('Fri', 'Lunch', 13.42, 1.58, 11.77)
        # ('Sat', 'Dinner', 32.83, 1.17, 3.56)
        # ('Thur', 'Dinner', 18.78, 3.0, 15.97)


        #Retrieve the total bill, tip amount, and tip percentage for parties of size 4 or more, 
        #where the tip percentage is greater than 15%, and the total bill is between $50 and $100
        f'''select total_bill, tip, (tip/total_bill)*100 as tip_percentage  
        from {Tablename}
        where size>=4 and ((tip/total_bill)*100)>15 and total_bill between 50 and 100
            ''',

        # Answer:
        # There is no row that fulfills this condition.

        
        #Find the average tip percentage for each combination of day, time, and smoker status, 
        # but only include combinations with more than 5 records
        f'''select A.day, A.time, a.smoker, round(avg((A.tip/A.total_bill)*100),2) as avg_tip_percentage   from {Tablename} as A join
        (select day, time, smoker, count(*) as counts
        from {Tablename}
        group by day, time, smoker having count(*)>5) as B on A.day = B.day and A.time = B.time and A.smoker = B.smoker
        group by A.day, A.time, A.smoker''',
        

        # Answer:
        # ['day', 'time', 'smoker', 'avg_tip_percentage']
        # ('Fri', 'Dinner', 'Yes', 16.53)
        # ('Fri', 'Lunch', 'Yes', 18.89)
        # ('Sat', 'Dinner', 'No', 15.8)
        # ('Sat', 'Dinner', 'Yes', 14.79)
        # ('Sun', 'Dinner', 'No', 16.01)
        # ('Sun', 'Dinner', 'Yes', 18.73)
        # ('Thur', 'Lunch', 'No', 16.03)
        # ('Thur', 'Lunch', 'Yes', 16.39)
     
       
        #It was determined that there was an error in the database. Please update the record that corresponds to id=10 and set smoker to Yes.
        f'''Update {Tablename} Set smoker = 'Yes' 
        Where total_bill = 15.04 and tip = 1.96''',
        
        
        # Answer:
       
        #Delete records from the database that have a total bill that is less than $10.
        f'Delete from {Tablename} where total_bill <10'
       

        # Answer:
    
        ]

    run_queries(cursor,queries)

    #Closing data and giving us 'finished' comment when done.

    cursor.close()
    database.commit()
    database.close()

    
if __name__ =="__main__":
    main()


OperationalError: database is locked