### Connecting to Postgresql

In [None]:
pip install sqlalchemy #helps python program to talk to databases

In [None]:
pip install psycopg2-binary #programs to connect to and work with PostgreSQL databases

In [None]:
%load_ext sql
from sqlalchemy import create_engine
%config SqlMagic.style = '_DEPRECATED_DEFAULT'

In [None]:
pip install ipython-sql

In [None]:
%sql postgresql://postgres:@localhost:5432/customerdb #connection string

### 3. Periodic Analysis

## Overview

### 3.1.1 How does the transaction activity of customers from different income-groups vary over time?

In [26]:

%%sql
SELECT
    CONCAT(EXTRACT(YEAR FROM t.record_date), '-', EXTRACT(MONTH FROM t.record_date)) AS year_month,
    i.income_group_desc as income_group,
    COUNT(*) AS transaction_count,
    ROUND(SUM(amt)) AS total_transaction_value,
    ROUND(AVG(amt)) AS avg_transaction_value
FROM transactions t
JOIN customers c ON t.customer_identifier = c.customer_identifier
JOIN income_group i ON c.income_group_code = i.income_group_code
WHERE i.income_group_desc != 'NOT SUPPLIED'
GROUP BY year_month, i.income_group_desc
ORDER BY year_month, i.income_group_desc
Limit 5;


 * postgresql://postgres:***@localhost:5432/customerdb
5 rows affected.


year_month,income_group,transaction_count,total_transaction_value,avg_transaction_value
2021-10,R 0 - 499,614,11240.0,18.0
2021-10,"R1,000 - 1,999",186,3419.0,18.0
2021-10,"R10,000 - 11,999",2423,-102420.0,-42.0
2021-10,"R12,000 - 14,999",2655,-53336.0,-20.0
2021-10,"R15,000 - 19,999",4957,31961.0,6.0



### 3.1.2 What is the correlation between transaction counts and total transaction amounts for each product code?

In [27]:
%%sql

SELECT
    CONCAT(EXTRACT(YEAR FROM t.record_date), '-', EXTRACT(MONTH FROM t.record_date)) AS year_month,
    t.product_code,
    COUNT(customer_identifier) AS transaction_count, 
    ROUND(SUM(amt)) AS total_amt
FROM transactions t
GROUP BY year_month, t.product_code
ORDER BY year_month, t.product_code
Limit 5;


 * postgresql://postgres:***@localhost:5432/customerdb
5 rows affected.


year_month,product_code,transaction_count,total_amt
2021-10,CHEQ,18514,-371630.0
2021-10,SAVE,13099,19936.0
2021-11,CHEQ,27513,302617.0
2021-11,SAVE,20473,269743.0
2021-12,CHEQ,43471,-839668.0


### 3.2.1 How does the frequency of transactions by loyal and customers change over time?

In [28]:
%%sql
WITH customer_transactions AS (
    SELECT
        CONCAT(EXTRACT(YEAR FROM t.record_date), '-', EXTRACT(MONTH FROM t.record_date)) AS year_month,
        CASE
            WHEN c.number_of_accounts > (
                SELECT AVG(number_of_accounts) FROM customers
            )
            AND (
                SELECT COUNT(*) / COUNT(DISTINCT customer_identifier) FROM transactions
            ) < (
                SELECT COUNT(*) FROM transactions
            )
            THEN 'Loyal Customers'
            ELSE 'Regular Customers'
        END AS customer_type,
        COUNT(*) AS transaction_count,
        ROUND(SUM(t.amt)) AS total_amount,
        ROUND(AVG(t.amt)) AS avg_amount
    FROM transactions t
    JOIN customers c 
    ON t.customer_identifier = c.customer_identifier
    GROUP BY 
        year_month,
        customer_type
)
SELECT *
FROM customer_transactions
WHERE customer_type = 'Loyal Customers'
ORDER BY 
    year_month
    Limit 5;


 * postgresql://postgres:***@localhost:5432/customerdb
5 rows affected.


year_month,customer_type,transaction_count,total_amount,avg_amount
2021-10,Loyal Customers,17272,-158635.0,-9.0
2021-11,Loyal Customers,25948,295537.0,11.0
2021-12,Loyal Customers,44517,-608044.0,-14.0
2021-7,Loyal Customers,1409,-2811.0,-2.0
2021-8,Loyal Customers,4285,-82960.0,-19.0


### 3.2.2 How does the frequency of transactions by regular customers change over time?

In [29]:
%%sql
WITH customer_transactions AS (
    SELECT
        CONCAT(EXTRACT(YEAR FROM t.record_date), '-', EXTRACT(MONTH FROM t.record_date)) AS year_month,
        CASE
            WHEN c.number_of_accounts > (
                SELECT AVG(number_of_accounts) FROM customers
            )
            AND (
                SELECT COUNT(*) / COUNT(DISTINCT customer_identifier) FROM transactions
            ) < (
                SELECT COUNT(*) FROM transactions
            )
            THEN 'Loyal Customers'
            ELSE 'Regular Customers'
        END AS customer_type,
        COUNT(*) AS transaction_count,
        ROUND(SUM(t.amt)) AS total_amount,
        ROUND(AVG(t.amt)) AS avg_amount
    FROM transactions t
    JOIN customers c 
    ON t.customer_identifier = c.customer_identifier
    GROUP BY 
        year_month,
        customer_type
)
SELECT *
FROM customer_transactions
WHERE customer_type = 'Regular Customers'
ORDER BY 
    year_month
    Limit 5;


 * postgresql://postgres:***@localhost:5432/customerdb
5 rows affected.


year_month,customer_type,transaction_count,total_amount,avg_amount
2021-10,Regular Customers,14341,-193059.0,-13.0
2021-11,Regular Customers,22038,276822.0,13.0
2021-12,Regular Customers,35755,-205440.0,-6.0
2021-7,Regular Customers,854,-10081.0,-12.0
2021-8,Regular Customers,3165,-14210.0,-4.0


### 3.3 Monthly Unpaid Transactions and Customer Count over the period of 14 months

In [30]:
%%sql
SELECT 
    CONCAT(EXTRACT(YEAR FROM t.record_date), '-', EXTRACT(MONTH FROM t.record_date)) AS year_month,
    ROUND(SUM(t.amt)) AS total_unpaid,
    COUNT(DISTINCT c.customer_identifier) AS bad_count
FROM transactions t
JOIN customers c ON t.customer_identifier = c.customer_identifier

WHERE transaction_description IN ('BAD DEBT W/OFF', 'DC UNPAID', 'NAEDO DO UNPD', 'ACB DEBIT REVERSAL', 'UNPAID DEBIT')
GROUP BY year_month
ORDER BY year_month ASC
Limit 5;

 * postgresql://postgres:***@localhost:5432/customerdb
5 rows affected.


year_month,total_unpaid,bad_count
2021-10,65590.0,60
2021-11,95085.0,85
2021-12,139903.0,148
2021-7,9680.0,9
2021-8,19784.0,21
