#Create Date Ranges

In [0]:
from datetime import datetime
from dateutil.relativedelta import relativedelta
import pandas as pd

# Function to generate date ranges for rolling 3-month windows
def generate_date_ranges(start_date, end_date):
    current = datetime.strptime(end_date, "%Y-%m-%d")
    start = datetime.strptime(start_date, "%Y-%m-%d")
    date_ranges = []

    while current >= start:
        end_window = current.replace(day=1) + relativedelta(months=1) - relativedelta(days=1)  # Last day of the current month
        start_window = (current - relativedelta(months=2)).replace(day=1)  # First day of the 3rd month back
        next_month_start = (current + relativedelta(months=1)).replace(day=1)  # First day of the next month
        next_month_end = next_month_start + relativedelta(months=1) - relativedelta(days=1)  # Last day of the next month

        retention_month = next_month_start.strftime("%Y%m")  # Format retention month as YYYYMM
        time_window_start = start_window.strftime("%Y%m")  # Format start month of time window as YYYYMM
        time_window_end = end_window.strftime("%Y%m")  # Format end month of time window as YYYYMM

        date_ranges.append({
            "start_window": start_window.strftime("%Y-%m-%d"),
            "end_window": end_window.strftime("%Y-%m-%d"),
            "next_month_start": next_month_start.strftime("%Y-%m-%d"),
            "next_month_end": next_month_end.strftime("%Y-%m-%d"),
            "retention_month": retention_month,
            "time_window_start": time_window_start,
            "time_window_end": time_window_end
        })
        current -= relativedelta(months=1)  # Move back by 1 month

    return date_ranges[::-1]  # Reverse to get chronological order

# Generate rolling 3-month windows for the last 24 months
date_ranges = generate_date_ranges("2022-12-01", "2024-12-31")

#UAE Regions

In [0]:
regions = ['ABU DHABI', 'AL AIN', 'DUBAI', 'SHARJAH']
for region in regions:
    results = []
    for dr in date_ranges:
        sql_query = f"""
        WITH all_customers AS (
            SELECT
                t1.customer_id
            FROM gold.transaction.uae_pos_transactions AS t1
            JOIN gold.customer.vynamic_customer_profile AS t2 ON t1.customer_id = t2.maxxing_account_key
            JOIN gold.store.store_master AS t3 ON t1.store_id = t3.store_id
            WHERE
                t1.business_day BETWEEN '{dr['start_window']}' AND '{dr['end_window']}'
                AND t2.LHRDate_utc IS NOT NULL
                AND t2.lhrdate IS NOT NULL
                AND t2.loyalty_program_id = 1
                AND t3.tayeb_flag = 0
                AND t3.region_name = "{region}"
                AND t1.transaction_type_id NOT IN ("RR", "RT")
                AND t1.amount > 0
                AND t1.quantity > 0
            GROUP BY 1
        ),

        recent_customers AS (
            SELECT
                COUNT(DISTINCT t1.customer_id) AS recent_customers
            FROM gold.transaction.uae_pos_transactions AS t1
            JOIN all_customers AS t2 ON t1.customer_id = t2.customer_id
            JOIN gold.store.store_master AS t3 ON t1.store_id = t3.store_id
            WHERE
                t1.business_day BETWEEN '{dr['next_month_start']}' AND '{dr['next_month_end']}'
                AND t1.transaction_type_id NOT IN ("RR", "RT")
                AND t3.tayeb_flag = 0
                AND t3.region_name = "{region}"
                AND t1.amount > 0
                AND t1.quantity > 0
        )

        SELECT
            "{region}" AS region_name,
            COUNT(DISTINCT t1.customer_id) AS total_customers,
            t2.recent_customers,
            ROUND(t2.recent_customers / total_customers * 100, 1) AS retention_perc
        FROM all_customers AS t1, recent_customers AS t2
        GROUP BY t2.recent_customers
        """
        
        # Execute the query using Spark or Databricks
        result = spark.sql(sql_query).toPandas()
        result['time_window_year_month'] = dr['time_window_start'] + ' to ' + dr['time_window_end']
        result['retention_year_month'] = dr['retention_month']
        results.append(result)

    # Combine all results into a single DataFrame
    final_results = pd.concat(results, ignore_index=True)

    final_results = spark.createDataFrame(final_results)
    final_results.write.option("overwriteSchema", "true").mode("append").saveAsTable("dev.sandbox.pj_uae_loyalty_customers_retention")

#UAE

In [0]:
results = []
for dr in date_ranges:
    sql_query = f"""
    WITH all_customers AS (
        SELECT t1.customer_id
        FROM gold.transaction.uae_pos_transactions AS t1
        JOIN gold.customer.vynamic_customer_profile AS t2 ON t1.customer_id = t2.maxxing_account_key
        WHERE
            t1.business_day BETWEEN '{dr['start_window']}' AND '{dr['end_window']}'
            AND t2.LHRDate_utc IS NOT NULL
            AND t2.lhrdate IS NOT NULL
            AND t2.loyalty_program_id = 1
            AND t1.transaction_type_id NOT IN ("RR", "RT")
            AND t1.amount > 0
            AND t1.quantity > 0
        GROUP BY 1
    ),

    recent_customers AS (
        SELECT COUNT(DISTINCT t1.customer_id) AS recent_customers
        FROM gold.transaction.uae_pos_transactions AS t1
        JOIN all_customers AS t2 ON t1.customer_id = t2.customer_id
        WHERE
            t1.business_day BETWEEN '{dr['next_month_start']}' AND '{dr['next_month_end']}'
            AND t1.transaction_type_id NOT IN ("RR", "RT")
            AND t1.amount > 0
            AND t1.quantity > 0
    )

    SELECT
        "UAE" AS country,
        COUNT(DISTINCT t1.customer_id) AS total_customers,
        t2.recent_customers,
        ROUND(t2.recent_customers / total_customers * 100, 1) AS retention_perc
    FROM all_customers AS t1, recent_customers AS t2
    GROUP BY t2.recent_customers
    """
    
    # Execute the query using Spark or Databricks
    result = spark.sql(sql_query).toPandas()
    result['time_window_year_month'] = dr['time_window_start'] + ' to ' + dr['time_window_end']
    result['retention_year_month'] = dr['retention_month']
    results.append(result)

# Combine all results into a single DataFrame
final_results = pd.concat(results, ignore_index=True)

final_results = spark.createDataFrame(final_results)
final_results.write.option("overwriteSchema", "true").mode("append").saveAsTable("dev.sandbox.pj_gcc_loyalty_customers_retention")

In [0]:
%sql
WITH all_customers AS (
    SELECT t1.customer_id
    FROM gold.transaction.uae_pos_transactions AS t1
    JOIN gold.customer.vynamic_customer_profile AS t2 ON t1.customer_id = t2.maxxing_account_key
    WHERE
        t1.business_day BETWEEN '2024-01-01' AND '2024-03-31'
        AND t2.LHRDate_utc IS NOT NULL
        AND t2.lhrdate IS NOT NULL
        AND t2.loyalty_program_id = 1
        AND t1.transaction_type_id NOT IN ("RR", "RT")
        AND t1.amount > 0
        AND t1.quantity > 0
    GROUP BY 1
),

recent_customers AS (
    SELECT COUNT(DISTINCT t1.customer_id) AS recent_customers
    FROM gold.transaction.uae_pos_transactions AS t1
    JOIN all_customers AS t2 ON t1.customer_id = t2.customer_id
    WHERE
        t1.business_day BETWEEN '2024-04-01' AND '2024-04-10'
        AND t1.transaction_type_id NOT IN ("RR", "RT")
        AND t1.amount > 0
        AND t1.quantity > 0
)

SELECT
    "UAE" AS country,
    COUNT(DISTINCT t1.customer_id) AS total_customers,
    t2.recent_customers,
    ROUND(t2.recent_customers / total_customers * 100, 1) AS retention_perc
FROM all_customers AS t1, recent_customers AS t2
GROUP BY t2.recent_customers

In [0]:
%sql
SELECT
    t3.department_name,
    ROUND(SUM(CASE WHEN t1.business_day <= '2024-03-31' THEN amount END) / 3) AS q1_sales,
    ROUND(SUM(CASE WHEN t1.business_day > '2024-03-31' THEN amount END)) AS april_sales,
    ROUND((april_sales - q1_sales) / q1_sales * 100, 2) AS growth
FROM gold.transaction.uae_pos_transactions AS t1
JOIN gold.customer.vynamic_customer_profile AS t2 ON t1.customer_id = t2.maxxing_account_key
JOIN gold.material.material_master AS t3 ON t1.product_id = t3.material_id
WHERE
    t1.business_day BETWEEN '2024-01-01' AND '2024-04-30'
    AND t2.LHRDate_utc IS NOT NULL
    AND t2.lhrdate IS NOT NULL
    AND t2.loyalty_program_id = 1
    AND t1.transaction_type_id NOT IN ("RR", "RT")
    AND t1.amount > 0
    AND t1.quantity > 0
GROUP BY 1
ORDER BY growth DESC

In [0]:
%sql
SELECT
    t3.department_name,
    ROUND(SUM(CASE WHEN t1.business_day <= '2024-06-30' THEN amount END) / 3) AS q1_sales,
    ROUND(SUM(CASE WHEN t1.business_day > '2024-06-30' THEN amount END)) AS april_sales,
    ROUND((april_sales - q1_sales) / q1_sales * 100, 2) AS growth
FROM gold.transaction.uae_pos_transactions AS t1
JOIN gold.customer.vynamic_customer_profile AS t2 ON t1.customer_id = t2.maxxing_account_key
JOIN gold.material.material_master AS t3 ON t1.product_id = t3.material_id
WHERE
    t1.business_day BETWEEN '2024-04-01' AND '2024-07-31'
    AND t2.LHRDate_utc IS NOT NULL
    AND t2.lhrdate IS NOT NULL
    AND t2.loyalty_program_id = 1
    AND t1.transaction_type_id NOT IN ("RR", "RT")
    AND t1.amount > 0
    AND t1.quantity > 0
GROUP BY 1
ORDER BY growth DESC

#Qatar

In [0]:
results = []
for dr in date_ranges:
    sql_query = f"""
    WITH all_customers AS (
        SELECT t1.customer_key
        FROM gold.transaction.qatar_pos_transactions AS t1
        JOIN gold.customer.vynamic_customer_profile AS t2 ON t1.customer_key = t2.customer_key
        WHERE
            t1.business_day BETWEEN '{dr['start_window']}' AND '{dr['end_window']}'
            AND t2.LHRDate_utc IS NOT NULL
            AND t2.lhrdate IS NOT NULL
            AND t2.loyalty_program_id = 2
            AND t1.transaction_type_id NOT IN ("RR", "RT")
            AND t1.amount > 0
            AND t1.quantity > 0
        GROUP BY 1
    ),

    recent_customers AS (
        SELECT COUNT(DISTINCT t1.customer_key) AS recent_customers
        FROM gold.transaction.qatar_pos_transactions AS t1
        JOIN all_customers AS t2 ON t1.customer_key = t2.customer_key
        WHERE
            t1.business_day BETWEEN '{dr['next_month_start']}' AND '{dr['next_month_end']}'
            AND t1.transaction_type_id NOT IN ("RR", "RT")
            AND t1.amount > 0
            AND t1.quantity > 0
    )

    SELECT
        "Qatar" AS country,
        COUNT(DISTINCT t1.customer_key) AS total_customers,
        t2.recent_customers,
        ROUND(t2.recent_customers / total_customers * 100, 1) AS retention_perc
    FROM all_customers AS t1, recent_customers AS t2
    GROUP BY t2.recent_customers
    """
    
    # Execute the query using Spark or Databricks
    result = spark.sql(sql_query).toPandas()
    result['time_window_year_month'] = dr['time_window_start'] + ' to ' + dr['time_window_end']
    result['retention_year_month'] = dr['retention_month']
    results.append(result)

# Combine all results into a single DataFrame
final_results = pd.concat(results, ignore_index=True)

final_results = spark.createDataFrame(final_results)
final_results.write.option("overwriteSchema", "true").mode("append").saveAsTable("dev.sandbox.pj_gcc_loyalty_customers_retention")

#Saudi Arabia

In [0]:
results = []
for dr in date_ranges:
    sql_query = f"""
    WITH all_customers AS (
        SELECT t1.customer_key
        FROM gold.transaction.saudi_pos_transactions AS t1
        JOIN gold.customer.vynamic_customer_profile AS t2 ON t1.customer_key = t2.customer_key
        WHERE
            t1.business_day BETWEEN '{dr['start_window']}' AND '{dr['end_window']}'
            AND t2.LHRDate_utc IS NOT NULL
            AND t2.lhrdate IS NOT NULL
            AND t2.loyalty_program_id = 3
            AND t1.transaction_type_id NOT IN ("RR", "RT")
            AND t1.amount > 0
            AND t1.quantity > 0
        GROUP BY 1
    ),

    recent_customers AS (
        SELECT COUNT(DISTINCT t1.customer_key) AS recent_customers
        FROM gold.transaction.saudi_pos_transactions AS t1
        JOIN all_customers AS t2 ON t1.customer_key = t2.customer_key
        WHERE
            t1.business_day BETWEEN '{dr['next_month_start']}' AND '{dr['next_month_end']}'
            AND t1.transaction_type_id NOT IN ("RR", "RT")
            AND t1.amount > 0
            AND t1.quantity > 0
    )

    SELECT
        "Saudi Arabia" AS country,
        COUNT(DISTINCT t1.customer_key) AS total_customers,
        t2.recent_customers,
        ROUND(t2.recent_customers / total_customers * 100, 1) AS retention_perc
    FROM all_customers AS t1, recent_customers AS t2
    GROUP BY t2.recent_customers
    """
    
    # Execute the query using Spark or Databricks
    result = spark.sql(sql_query).toPandas()
    result['time_window_year_month'] = dr['time_window_start'] + ' to ' + dr['time_window_end']
    result['retention_year_month'] = dr['retention_month']
    results.append(result)

# Combine all results into a single DataFrame
final_results = pd.concat(results, ignore_index=True)

final_results = spark.createDataFrame(final_results)
final_results.write.option("overwriteSchema", "true").mode("append").saveAsTable("dev.sandbox.pj_gcc_loyalty_customers_retention")

#Bahrain

In [0]:
results = []
for dr in date_ranges:
    sql_query = f"""
    WITH all_customers AS (
        SELECT t1.customer_key
        FROM gold.transaction.bahrain_pos_transactions AS t1
        JOIN gold.customer.vynamic_customer_profile AS t2 ON t1.customer_key = t2.customer_key
        WHERE
            t1.business_day BETWEEN '{dr['start_window']}' AND '{dr['end_window']}'
            AND t2.LHRDate_utc IS NOT NULL
            AND t2.lhrdate IS NOT NULL
            AND t2.loyalty_program_id = 4
            AND t1.transaction_type_id NOT IN ("RR", "RT")
            AND t1.amount > 0
            AND t1.quantity > 0
        GROUP BY 1
    ),

    recent_customers AS (
        SELECT COUNT(DISTINCT t1.customer_key) AS recent_customers
        FROM gold.transaction.bahrain_pos_transactions AS t1
        JOIN all_customers AS t2 ON t1.customer_key = t2.customer_key
        WHERE
            t1.business_day BETWEEN '{dr['next_month_start']}' AND '{dr['next_month_end']}'
            AND t1.transaction_type_id NOT IN ("RR", "RT")
            AND t1.amount > 0
            AND t1.quantity > 0
    )

    SELECT
        "Bahrain" AS country,
        COUNT(DISTINCT t1.customer_key) AS total_customers,
        t2.recent_customers,
        ROUND(t2.recent_customers / total_customers * 100, 1) AS retention_perc
    FROM all_customers AS t1, recent_customers AS t2
    GROUP BY t2.recent_customers
    """
    
    # Execute the query using Spark or Databricks
    result = spark.sql(sql_query).toPandas()
    result['time_window_year_month'] = dr['time_window_start'] + ' to ' + dr['time_window_end']
    result['retention_year_month'] = dr['retention_month']
    results.append(result)

# Combine all results into a single DataFrame
final_results = pd.concat(results, ignore_index=True)

final_results = spark.createDataFrame(final_results)
final_results.write.option("overwriteSchema", "true").mode("append").saveAsTable("dev.sandbox.pj_gcc_loyalty_customers_retention")

#Oman

In [0]:
results = []
for dr in date_ranges:
    sql_query = f"""
    WITH all_customers AS (
        SELECT t1.customer_key
        FROM gold.transaction.oman_pos_transactions AS t1
        JOIN gold.customer.vynamic_customer_profile AS t2 ON t1.customer_key = t2.customer_key
        WHERE
            t1.business_day BETWEEN '{dr['start_window']}' AND '{dr['end_window']}'
            AND t2.LHRDate_utc IS NOT NULL
            AND t2.lhrdate IS NOT NULL
            AND t2.loyalty_program_id = 5
            AND t1.transaction_type_id NOT IN ("RR", "RT")
            AND t1.amount > 0
            AND t1.quantity > 0
        GROUP BY 1
    ),

    recent_customers AS (
        SELECT COUNT(DISTINCT t1.customer_key) AS recent_customers
        FROM gold.transaction.oman_pos_transactions AS t1
        JOIN all_customers AS t2 ON t1.customer_key = t2.customer_key
        WHERE
            t1.business_day BETWEEN '{dr['next_month_start']}' AND '{dr['next_month_end']}'
            AND t1.transaction_type_id NOT IN ("RR", "RT")
            AND t1.amount > 0
            AND t1.quantity > 0
    )

    SELECT
        "Oman" AS country,
        COUNT(DISTINCT t1.customer_key) AS total_customers,
        t2.recent_customers,
        ROUND(t2.recent_customers / total_customers * 100, 1) AS retention_perc
    FROM all_customers AS t1, recent_customers AS t2
    GROUP BY t2.recent_customers
    """
    
    # Execute the query using Spark or Databricks
    result = spark.sql(sql_query).toPandas()
    result['time_window_year_month'] = dr['time_window_start'] + ' to ' + dr['time_window_end']
    result['retention_year_month'] = dr['retention_month']
    results.append(result)

# Combine all results into a single DataFrame
final_results = pd.concat(results, ignore_index=True)

final_results = spark.createDataFrame(final_results)
final_results.write.option("overwriteSchema", "true").mode("append").saveAsTable("dev.sandbox.pj_gcc_loyalty_customers_retention")

#Kuwait

In [0]:
results = []
for dr in date_ranges:
    sql_query = f"""
    WITH all_customers AS (
        SELECT t1.customer_key
        FROM gold.transaction.kuwait_pos_transactions AS t1
        JOIN gold.customer.vynamic_customer_profile AS t2 ON t1.customer_key = t2.customer_key
        WHERE
            t1.business_day BETWEEN '{dr['start_window']}' AND '{dr['end_window']}'
            AND t2.LHRDate_utc IS NOT NULL
            AND t2.lhrdate IS NOT NULL
            AND t2.loyalty_program_id = 6
            AND t1.transaction_type_id NOT IN ("RR", "RT")
            AND t1.amount > 0
            AND t1.quantity > 0
        GROUP BY 1
    ),

    recent_customers AS (
        SELECT COUNT(DISTINCT t1.customer_key) AS recent_customers
        FROM gold.transaction.kuwait_pos_transactions AS t1
        JOIN all_customers AS t2 ON t1.customer_key = t2.customer_key
        WHERE
            t1.business_day BETWEEN '{dr['next_month_start']}' AND '{dr['next_month_end']}'
            AND t1.transaction_type_id NOT IN ("RR", "RT")
            AND t1.amount > 0
            AND t1.quantity > 0
    )

    SELECT
        "Kuwait" AS country,
        COUNT(DISTINCT t1.customer_key) AS total_customers,
        t2.recent_customers,
        ROUND(t2.recent_customers / total_customers * 100, 1) AS retention_perc
    FROM all_customers AS t1, recent_customers AS t2
    GROUP BY t2.recent_customers
    """
    
    # Execute the query using Spark or Databricks
    result = spark.sql(sql_query).toPandas()
    result['time_window_year_month'] = dr['time_window_start'] + ' to ' + dr['time_window_end']
    result['retention_year_month'] = dr['retention_month']
    results.append(result)

# Combine all results into a single DataFrame
final_results = pd.concat(results, ignore_index=True)

final_results = spark.createDataFrame(final_results)
final_results.write.option("overwriteSchema", "true").mode("append").saveAsTable("dev.sandbox.pj_gcc_loyalty_customers_retention")

#Export CSV

In [0]:
df = spark.sql("SELECT * FROM dev.sandbox.pj_gcc_loyalty_customers_retention").toPandas()
df.to_csv("/Workspace/Users/prem@loyalytics.in/Ad-hoc/gcc_loyalty_customers_retention.csv", index = False)

In [0]:
df = spark.sql("SELECT * FROM dev.sandbox.pj_uae_loyalty_customers_retention").toPandas()
df.to_csv("/Workspace/Users/prem@loyalytics.in/Ad-hoc/uae_loyalty_customers_retention.csv", index = False)