In [6]:
import redshift_connector
import pandas as pd
import os

In [15]:
host = os.environ['host']
database = os.environ['database']
user = os.environ['user']
password = os.environ['password']
schema = os.environ['schema']

In [86]:
def redshift_query(host : str, database : str, user : str, password : str, query: str ) -> pd.DataFrame:
    conn = redshift_connector.connect(
     host=host,
     database=database,
     user=user,
     password=password
    )
    cursor = conn.cursor()
    cursor.execute(query)
    result = cursor.fetch_dataframe()
    
    return result

### 1.	Our client Barilla would like to know how many customers purchased their pasta products during February 2020. 

In [125]:
query = f"""
SELECT
C.client,
C.brand,
COUNT(DISTINCT G.member_id) 
FROM {schema}.groceryorders AS G
LEFT JOIN {schema}.clientbrand AS C
ON G.brand = C.brand
WHERE
UPPER(C.client) = 'BARILLA'
AND
order_date BETWEEN '2020-02-01' AND '2020-02-29'
AND
UPPER(product_category) = 'PASTA'
GROUP BY
C.client,
C.brand
"""

In [126]:
result = redshift_query(host, database, user, password, query)
result

Unnamed: 0,client,brand,count
0,Barilla,Barilla,429


### 2. We would like to present the monthly grocery sales for each vendor and provide the sales in dollars and also as percentages. In addition, the following vendors should be transformed as kroger: kingSoopers, smiths, fredMeyer, frys, picknsave & ralphs. The following vendor should be considered as albertsons: safeway. 

In [89]:
query = f"""
SELECT
CASE
    WHEN vendor = 'kingSoopers' THEN 'kroger'
    WHEN vendor = 'smiths' THEN 'kroger'
    WHEN vendor = 'fredMeyer' THEN 'kroger'
    WHEN vendor = 'frys' THEN 'kroger'
    WHEN vendor = 'picknsave' THEN 'kroger'
    WHEN vendor = 'ralphs' THEN 'kroger'
    WHEN vendor = 'safeway' THEN 'albertsons'
    ELSE vendor
END AS group_vendors,
to_char(order_date, 'MM') as month_order,
to_char(order_date, 'YYYY') as year_order,
SUM(cost) as total_cost,
SUM(cost) * 100 / SUM(SUM(cost)) OVER(PARTITION BY month_order,year_order ) AS percentage_cost
FROM {schema}.groceryorders
GROUP BY
group_vendors,
month_order,
year_order
"""

In [90]:
result = redshift_query(host, database, user, password, query)
result

Unnamed: 0,group_vendors,month_order,year_date,total_cost,percentage_cost
0,instacart,02,2020,,
1,walmartGrocery,02,2020,8347.59,81.858780
2,kroger,02,2020,1164.14,11.415879
3,shipt,02,2020,563.12,5.522111
4,amazonFresh,02,2020,122.70,1.203230
...,...,...,...,...,...
75,amazonFresh,07,2020,33.74,0.527143
76,walmartGrocery,07,2020,4676.81,73.068991
77,instacart,07,2020,273.63,4.275108
78,kroger,07,2020,1239.30,19.362429


### 3. Our meal team is looking to create a budget friendly meal plan and would like to avoid using expensive recipe ingredients. They would like to know the top 10 least expensive products our customers have purchased. 

In [110]:
query = f"""
WITH temp_table AS(
    SELECT
    brand,
    product_category,
    product_name,
    cost / quantity as unit_cost
    FROM {schema}.groceryorders
    WHERE
    quantity > 0
    AND
    cost > 0
    order by
    unit_cost ASC
),
avg_table AS (
    SELECT
    brand,
    product_category,
    product_name,
    AVG(unit_cost) as avg_unit_cost,
    RANK () OVER (ORDER BY avg_unit_cost ASC) AS rnk_avg_cost
    FROM temp_table
    GROUP BY
    brand,
    product_category,
    product_name
    ORDER BY
    avg_unit_cost ASC
)
SELECT
brand,
product_category,
product_name,
avg_unit_cost
FROM avg_table
WHERE
rnk_avg_cost <= 10
"""

In [111]:
result = redshift_query(host, database, user, password, query)
result

Unnamed: 0,brand,product_category,product_name,avg_unit_cost
0,Swanson,broth,swanson natural goodness chicken broth 14.5 oz...,0.57
1,Hillshire,ham,Carolina Pride Honey Ham,0.62
2,Swanson,broth,swansons swanson natural goodness chicken brot...,0.675
3,Swanson,broth,swanson 50% less sodium beef broth 14.5 oz.,0.756667
4,Swanson,broth,swanson chicken broth 14.5 oz. can,0.782941
5,Swanson,broth,swanson vegetable broth 14.5 oz. can,0.79
6,Swanson,broth,swanson 50% less sodium beef broth 14.5 oz. can,0.815
7,Swanson,broth,Swanson50% Less Sodium Beef Broth,0.830356
8,Swanson,broth,Swanson Clear Beef Broth,0.840866
9,Red Cap,seasoning,mccormick brown gravy mix 0.87 oz,0.85


### 4.The brand Tyson is considering partnering with eMeals to promote chicken products. Tyson would like to know what their current market share of online chicken sales and who are their competitors.

In [129]:
query = f"""
SELECT
brand,
product_category,
UPPER(product_name)
FROM {schema}.groceryorders
"""

In [130]:
result = redshift_query(host, database, user, password, query)
result

Unnamed: 0,brand,product_category,upper
0,Hillshire,kielbasa,JENNIE-O TURKEY HARDWOOD SMOKED TURKEY KIELBASA
1,Hillshire,kielbasa,HILLSHIRE FARM TURKEY POLSKA KIELBASA SMOKED S...
2,Hillshire,andouille,HILLSHIRE FARM CAJUN STYLE ANDOUILLE SMOKED SA...
3,Hillshire,kielbasa,HILLSHIRE FARM POLSKA KIELBASA SMOKED SAUSAGE ...
4,Hillshire,andouille,HILLSHIRE FARM CAJUN STYLE ANDOUILLE SMOKED SA...
...,...,...,...
31245,,chicken,TYSON BONELESS SKINLESS CHICKEN BREASTS
31246,,chicken,TYSON CHICKEN THIGHS
31247,,chicken,OSCAR MAYER DELI FRESH ROTISSERIE SEASONED CHI...
31248,,chicken,OSCAR MAYER DELI FRESH ROTISSERIE SEASONED CHI...


In [134]:
result[(result['upper'].str.contains('CHICKEN') == True) & (result['product_category'] == 'chicken')]

Unnamed: 0,brand,product_category,upper
22672,,chicken,FRESHNESS GUARANTEED BONELESS CHICKEN THIGHS
22673,,chicken,TYSON ALL NATURAL* BONELESS SKINLESS CHICKEN T...
22674,,chicken,PUBLIX GREENWISE BONE IN CHICKEN BREASTS WITH ...
22675,,chicken,TYSON TRIMMED & READY FRESH BONELESS SKINLESS ...
22676,,chicken,TYSON BONELESS SKINLESS CHICKEN BREAST TENDERL...
...,...,...,...
31245,,chicken,TYSON BONELESS SKINLESS CHICKEN BREASTS
31246,,chicken,TYSON CHICKEN THIGHS
31247,,chicken,OSCAR MAYER DELI FRESH ROTISSERIE SEASONED CHI...
31248,,chicken,OSCAR MAYER DELI FRESH ROTISSERIE SEASONED CHI...


In [119]:
result['product_category'].unique()

array(['kielbasa', 'andouille', 'smoked sausage', 'ham', 'salami',
       'roast beef', 'turkey breast', 'hot dog', 'sausage', 'cereal',
       'eggo', 'morningstar', 'beer', 'pastrami', 'wine', 'coke',
       'dasani', 'milk', 'sprite', 'pork', 'bacon', 'pepperoni', 'chips',
       'tea', 'smart water', 'pop-tarts', 'tortillas', 'condiment',
       'seasoning', 'asian', 'beef', 'soup', 'broth', 'pasta', 'sauce',
       'crackers', 'cheese', 'yogurt', 'butter', 'guacamole', 'hummus',
       'spices', 'seafood', 'plant based', 'spirits', None, 'Pasta',
       'chicken', 'champagne'], dtype=object)

### 5. A potential client would like to see the month-over-month change in wine sales as a percentage.

In [185]:
query = f"""
WITH lag_table AS(
SELECT
UPPER(product_category) AS product_category,
CAST(to_char(order_date, 'MM') AS int) month_order,
CAST(to_char(order_date, 'YYYY') AS INT) as year_order,
SUM(cost) as current_month,
LAG(current_month) OVER (PARTITION BY year_order ORDER BY year_order, month_order ASC ) AS previous_month,
current_month / previous_month - 1 AS diff
FROM {schema}.groceryorders
WHERE
UPPER(product_category) = 'WINE'
GROUP BY
product_category,
month_order,
year_order
)
SELECT
product_category,
month_order,
year_order,
diff
FROM
lag_table
"""

In [186]:
result = redshift_query(host, database, user, password, query)
result

Unnamed: 0,product_category,month_order,year_order,diff
0,WINE,12,2020,0.260522
1,WINE,11,2020,0.253437
2,WINE,10,2020,-0.064586
3,WINE,9,2020,0.086648
4,WINE,8,2020,-0.303858
5,WINE,7,2020,0.318119
6,WINE,6,2020,-0.087548
7,WINE,5,2020,0.749365
8,WINE,4,2020,-0.508381
9,WINE,3,2020,-0.172375


### 6. Our meal team is looking to use wine as recipe ingredient in order to help promote a brand and increase brand sales. They would like to know which brand to use in future recipes.

In [218]:
query = f"""
WITH participation_table AS (
SELECT
brand,
to_char(order_date, 'MM') as month_order,
to_char(order_date, 'YYYY') as year_order,
SUM(cost) as total_cost,
SUM(cost) * 100 / SUM(SUM(cost)) OVER(PARTITION BY month_order,year_order ) AS current_participation
FROM {schema}.groceryorders
WHERE
UPPER(product_category) = 'WINE'
GROUP BY
brand,
month_order,
year_order
),
lag_table AS(
    SELECT
    brand,
    month_order,
    year_order,
    total_cost,
    current_participation,    
    LAG(current_participation) OVER (PARTITION BY brand, year_order ORDER BY brand, year_order, month_order ASC ) AS previous_participation
    FROM participation_table
    GROUP BY
    brand,
    month_order,
    year_order,
    total_cost,
    current_participation
)

SELECT
brand,
month_order,
year_order,
current_participation / previous_participation - 1 as diff
FROM lag_table
WHERE
diff IS NOT NULL
ORDER BY
year_order ASC,
month_order ASC,
diff DESC
"""

In [219]:
result = redshift_query(host, database, user, password, query)
result

Unnamed: 0,brand,month_order,year_order,diff
0,Black Box,02,2020,1.842563
1,Barefoot,02,2020,0.195752
2,Woodbridge,02,2020,0.046746
3,Robert Mondavi Private Selection,02,2020,-0.071567
4,Kendall-Jackson,02,2020,-0.207881
...,...,...,...,...
98,Barefoot,02,2021,-0.336260
99,Apothic,02,2021,-0.339460
100,19 Crimes,02,2021,-0.413935
101,Mirassou,02,2021,-0.433941
