In [32]:
## Importing necessary libraries
import pandas as pd
import numpy as np
import os

import datetime
from prettytable import PrettyTable
import pymysql
import yaml
from sqlalchemy import create_engine
from sqlalchemy import text


In [33]:
## Root_directory
root_dir = os.getcwd()

In [34]:
## Functions

# Function to read YAML file
def read_yaml(file_path):
    with open(file_path, 'r') as file:
        try:
            # Load the YAML content into a Python dictionary
            data = yaml.safe_load(file)
            return data
        except yaml.YAMLError as exc:
            print(f"Error reading YAML file: {exc}")
            return None

# Run Query function
def run_query(engine,query):
    try:
        with engine.connect() as connection:
            result = connection.execute(text(query))
            return result.fetchall()
    except Exception as e:
        print(f"Error in the query -> {e}")

In [35]:
## Reading MYSQL config file

file_path = os.path.join(root_dir,'MYSQL_user_config.yaml')
config = read_yaml(file_path)

user = config.get('MYSQL_credentials')['user']
password = config.get('MYSQL_credentials')['password']
host = config.get('MYSQL_credentials')['host']
port = config.get('MYSQL_credentials')['port']

In [36]:
## Engine configuration

db_url = f'mysql+pymysql://{user}:{password}@{host}:{port}'
db_name = "FETCH_DB_Amarthya"
engine = create_engine(f'{db_url}'+'/'+f'{db_name}')
run_query(engine, f'USE {db_name}')

Error in the query -> This result object does not return rows. It has been closed automatically.


### When considering average spend from receipts with 'rewardsReceiptStatus’ of ‘Accepted’ or ‘Rejected’, which is greater?

In [37]:
## Query 3

query_3 = f"""
SELECT R.rewardsReceiptStatus As RewardStatus,
ROUND(AVG(COALESCE(R.totalSpent,0)),4) AS Avg_total_spent
FROM receipts R
GROUP BY R.rewardsReceiptStatus
HAVING R.rewardsReceiptStatus = 'FINISHED' or R.rewardsReceiptStatus = 'REJECTED'
ORDER BY Avg_total_spent DESC;
"""
results=run_query(engine,query_3)

columns = ["RewardsStatus", "Avg_total_spent"]
table = PrettyTable()
table.field_names = columns


for row in results:
    table.add_row(row)

print(table)

+---------------+-----------------+
| RewardsStatus | Avg_total_spent |
+---------------+-----------------+
|    FINISHED   |     80.8543     |
|    REJECTED   |     23.3261     |
+---------------+-----------------+


In [38]:
## Query 4

query_4 = f"""
SELECT R.rewardsReceiptStatus As RewardStatus,
ROUND(SUM(COALESCE(R.purchasedItemCount,0)),4) AS Total_number_of_items_purchased
FROM receipts R
GROUP BY R.rewardsReceiptStatus
HAVING R.rewardsReceiptStatus = 'FINISHED' or R.rewardsReceiptStatus = 'REJECTED'
ORDER BY Total_number_of_items_purchased DESC;
"""
results=run_query(engine,query_4)

columns = ["RewardsStatus", "Total_number_of_items_purchased"]
table = PrettyTable()
table.field_names = columns


for row in results:
    table.add_row(row)

print(table)

+---------------+---------------------------------+
| RewardsStatus | Total_number_of_items_purchased |
+---------------+---------------------------------+
|    FINISHED   |              8184.0             |
|    REJECTED   |              173.0              |
+---------------+---------------------------------+
