# In this codebase, Stackholder related Business questions are answered

In [1]:
import pandas as pd
import sqlite3

In [2]:
# creating in-memory SQL database
conn = sqlite3.connect(':memory:') 

In [3]:
# reading the CSV files 
users = pd.read_csv("../cleansed_file/users_cleansed_data.csv")
brands = pd.read_csv("../cleansed_file/brands_cleansed_data.csv")
receipts = pd.read_csv("../cleansed_file/cleansed_receipts.csv")
receipts_items = pd.read_csv("../cleansed_file/cleansed_receipt_items.csv")

In [4]:
# creating tables in the in-memory SQLite database using the dataframe
users.to_sql('users', conn, index=False, if_exists='replace')
brands.to_sql('brands', conn, index=False, if_exists='replace')
receipts.to_sql('receipts', conn, index=False, if_exists='replace')
receipts_items.to_sql('receipts_items', conn, index=False, if_exists='replace')

6941

## Q. When considering average spend from receipts with 'rewardsReceiptStatus’ of ‘Accepted’ or ‘Rejected’, which is greater?


In [5]:
q1_avgSpend = """with cte as(
                    select 
                        avg(case 
                                when rewardsReceiptStatus = 'FINISHED' or rewardsReceiptStatus = 'ACCEPTED' 
                                then totalSpent 
                        end) Accepted_avgTotalSpend,
                        avg(case when rewardsReceiptStatus = 'REJECTED' 
                                then totalSpent 
                        end) Rejected_avgTotalSpend
                    from receipts
                )
                select 
                case 
                    when Accepted_avgTotalSpend>Rejected_avgTotalSpend 
                    then 'Accepted' 
                    else 'Rejected'
                end  Q1_ans
                from cte;"""

In [6]:
q1_avgSpend_df = pd.read_sql(q1_avgSpend, conn)
print(q1_avgSpend_df.to_string(index=False))

  Q1_ans
Accepted


## Q. When considering total number of items purchased from receipts with 'rewardsReceiptStatus’ of ‘Accepted’ or ‘Rejected’, which is greater?


In [7]:
q2_sumCount = """with cte as(
                select 
                    sum(case 
                            when rewardsReceiptStatus = 'FINISHED' or rewardsReceiptStatus = 'ACCEPTED'
                            then purchasedItemCount 
                    end) Accepted_sumTotalCount,
                    sum(case 
                        when rewardsReceiptStatus = 'REJECTED' 
                        then purchasedItemCount 
                    end) Rejected_sumTotalCount
                from receipts)
                select 
                case 
                    when Accepted_sumTotalCount>Rejected_sumTotalCount 
                    then 'Accepted' 
                    else 'Rejected'
                end Q2_ans
                from cte;"""

In [8]:
q2_sumCount_df = pd.read_sql(q2_sumCount, conn)
print(q2_sumCount_df.to_string(index=False))

  Q2_ans
Accepted


## ***Note:*** As per the documentation provided, I'm considering barcode from the brands table to join receipt_items/receipts table with the brands table, (as barcode = the barcode on the item)

## Q. Which brand has the most spend among users who were created within the past 6 months?


In [9]:
q3_brandwmostspend = """with cte as( 
                        SELECT userID
                        FROM users 
                        WHERE createdDate >= (select DATE(max(createdDate), '-6 months') from users)
                        )
                        select C.brandName, sum(b.itemPrice) total_price
                        from receipts a
                        inner join receipts_items b
                        on a.receiptID = b.receiptID
                        inner join brands c
                        on b.barcode = c.barcode
                        where b.barcode is not null
                        and a.userID in (select userID from cte)
                        group by b.barcode
                        order by total_price desc
                     """

In [10]:
q3_brandwmostspend_df = pd.read_sql(q3_brandwmostspend, conn)
print(q3_brandwmostspend_df.to_string(index=False))

            brandName  total_price
Cracker Barrel Cheese       196.98
             Tostitos        80.66
              Swanson        61.38
              Cheetos        22.00
                   V8        13.49
         Kettle Brand        11.07
      Diet Chris Cola        10.76
      Pepperidge Farm         9.00
               Jell-O         4.99
               Quaker         3.99
          Grey Poupon         3.29


## Q. Which brand has the most transactions among users who were created within the past 6 months?


In [11]:
q3_brandmosttransaction = """with cte as( 
                        SELECT userID
                        FROM users 
                        WHERE createdDate >= (select DATE(max(createdDate), '-6 months') from users)
                        )
                        select C.brandName, count(C.brandName) total_transaction
                        from receipts a
                        inner join receipts_items b
                        on a.receiptID = b.receiptID
                        inner join brands c
                        on b.barcode = c.barcode
                        where b.barcode is not null
                        and a.userID in (select userID from cte)
                        group by C.brandName
                        order by total_transaction desc
                     """

In [12]:
q3_brandmosttransaction_df = pd.read_sql(q3_brandmosttransaction, conn)
print(q3_brandmosttransaction_df.to_string(index=False))

            brandName  total_transaction
             Tostitos                 23
              Swanson                 11
      Pepperidge Farm                  3
         Kettle Brand                  3
                   V8                  2
                Prego                  2
               Jell-O                  2
      Diet Chris Cola                  2
Cracker Barrel Cheese                  2
               Quaker                  1
          Grey Poupon                  1
              Cheetos                  1
