In [1]:
import pandas as pd
import numpy as np
import os

import datetime
from prettytable import PrettyTable
import pymysql
import yaml
from sqlalchemy import create_engine
from sqlalchemy import text


In [2]:
root_dir = os.getcwd()

In [3]:
# Function to read YAML file
def read_yaml(file_path):
    with open(file_path, 'r') as file:
        try:
            # Load the YAML content into a Python dictionary
            data = yaml.safe_load(file)
            return data
        except yaml.YAMLError as exc:
            print(f"Error reading YAML file: {exc}")
            return None

In [4]:
file_path = os.path.join(root_dir,'MYSQL_user_config.yaml')
config = read_yaml(file_path)

user = config.get('MYSQL_credentials')['user']
password = config.get('MYSQL_credentials')['password']
host = config.get('MYSQL_credentials')['host']
port = config.get('MYSQL_credentials')['port']

In [5]:
def run_query(engine,query):
    try:
        with engine.connect() as connection:
            result = connection.execute(text(query))
            return result.fetchall()
    except Exception as e:
        print(f"Error in the query -> {e}")

In [6]:
db_url = f'mysql+pymysql://{user}:{password}@{host}:{port}'
db_name = "FETCH_DB"
engine = create_engine(f'{db_url}'+'/'+f'{db_name}')
run_query(engine, f'USE {db_name}')

Error in the query -> This result object does not return rows. It has been closed automatically.


### When considering average spend from receipts with 'rewardsReceiptStatus’ of ‘Accepted’ or ‘Rejected’, which is greater?

Irrespective of reward points earned or not, here is the query, to know the brand with the most spending from the users

In [21]:
query_5 = f"""
SELECT COALESCE(RR.brandCode,RR.description_first_word) AS Brand, SUM(RR.finalPrice) AS total_money_spent
FROM rewards_receipts RR
INNER JOIN receipts R
ON RR.receipt_id = R.receipt_id
INNER JOIN users U
ON R.userId = U.user_id
WHERE DATE_FORMAT(U.created_date_time,'%Y%m') <= (SELECT DATE_FORMAT(MAX(created_date_time),'%Y%m') FROM users)
AND DATE_FORMAT(U.created_date_time,'%Y%m') > (SELECT DATE_FORMAT(DATE_SUB(MAX(created_date_time), INTERVAL 6 MONTH), '%Y%m') FROM users)
GROUP BY COALESCE(RR.brandCode,RR.description_first_word)
ORDER BY total_money_spent DESC
LIMIT 1;
"""
results=run_query(engine,query_5)

columns = ["brand", "total_money_spent"]
table = PrettyTable()
table.field_names = columns


for row in results:
    table.add_row(row)

print(table)

+---------+-------------------+
|  brand  | total_money_spent |
+---------+-------------------+
| HUGGIES |      1931.92      |
+---------+-------------------+


In [27]:
query_6 = f"""
SELECT COALESCE(RR.brandCode,RR.description_first_word) AS Brand, COUNT(*) AS total_transactions
FROM rewards_receipts RR
INNER JOIN receipts R
ON RR.receipt_id = R.receipt_id
INNER JOIN users U
ON R.userId = U.user_id
WHERE DATE_FORMAT(U.created_date_time,'%Y%m') <= (SELECT DATE_FORMAT(MAX(created_date_time),'%Y%m') FROM users)
AND DATE_FORMAT(U.created_date_time,'%Y%m') > (SELECT DATE_FORMAT(DATE_SUB(MAX(created_date_time), INTERVAL 6 MONTH), '%Y%m') FROM users)
GROUP BY COALESCE(RR.brandCode,RR.description_first_word)
ORDER BY total_transactions DESC
LIMIT 1;
"""
results=run_query(engine,query_6)

columns = ["brand", "total_transactions"]
table = PrettyTable()
table.field_names = columns


for row in results:
    table.add_row(row)

print(table)

+--------+--------------------+
| brand  | total_transactions |
+--------+--------------------+
| HY-VEE |        296         |
+--------+--------------------+
