### Connecting to Postgresql

In [2]:
pip install sqlalchemy

Note: you may need to restart the kernel to use updated packages.


In [3]:
pip install psycopg2-binary

Note: you may need to restart the kernel to use updated packages.


In [4]:
%load_ext sql
from sqlalchemy import create_engine
%config SqlMagic.style = '_DEPRECATED_DEFAULT'

In [5]:
pip install ipython-sql

Note: you may need to restart the kernel to use updated packages.


In [6]:
%sql postgresql://postgres:@localhost:5432/customerdb

### 3. Periodic Analysis

3.1 How does the transaction activity of customers from different income groups vary over time?

In [7]:
%%sql
SELECT
    EXTRACT(YEAR FROM record_date) AS year,
    TO_CHAR(record_date, 'Mon') AS month,
    i.income_group_desc as income_group,
    COUNT(*) AS transaction_count,
    ROUND(SUM(amt)) AS total_transaction_value,
    ROUND(AVG(amt)) AS avg_transaction_value
FROM transactions t
JOIN customers c ON t.customer_identifier = c.customer_identifier
JOIN income_group i ON c.income_group_code = i.income_group_code
GROUP BY year, month, i.income_group_desc
ORDER BY year, month, i.income_group_desc;

 * postgresql://postgres:***@localhost:5432/customerdb
288 rows affected.


year,month,income_group,transaction_count,total_transaction_value,avg_transaction_value
2021,Aug,R 0 - 499,34,67.0,2.0
2021,Aug,"R10,000 - 11,999",561,-32388.0,-58.0
2021,Aug,"R12,000 - 14,999",559,11314.0,20.0
2021,Aug,"R15,000 - 19,999",1102,-8175.0,-7.0
2021,Aug,"R20,000 - 24,999",848,-13685.0,-16.0
2021,Aug,"R25,000 - 33,999",1332,3788.0,3.0
2021,Aug,"R3,000 - 3,999",35,535.0,15.0
2021,Aug,"R34,000 - 41,999",530,-28345.0,-53.0
2021,Aug,"R4,000 - 4,999",95,17452.0,184.0
2021,Aug,"R42,000 - 62,999",433,-22542.0,-52.0


3.2 How do transaction counts and values for specific product codes vary across different channels over time?

In [8]:
%%sql

SELECT
    EXTRACT(YEAR FROM record_date) AS year,
    TO_CHAR(record_date, 'Mon') AS month,
    t.channel,
    t.product_code,
    COUNT(*) AS transaction_count,
    ROUND(SUM(amt)) AS total_transaction_value,
    ROUND(AVG(amt)) AS total_transaction_value
FROM transactions t
GROUP BY year, month, t.channel, t.product_code
ORDER BY year, month, t.channel, t.product_code
Limit 10;

 * postgresql://postgres:***@localhost:5432/customerdb


10 rows affected.


year,month,channel,product_code,transaction_count,total_transaction_value,total_transaction_value_1
2021,Aug,atm,CHEQ,1158,-449637.0,-388.0
2021,Aug,atm,SAVE,874,-483370.0,-553.0
2021,Aug,internet,CHEQ,626,-499756.0,-798.0
2021,Aug,internet,SAVE,246,-23652.0,-96.0
2021,Aug,system,CHEQ,2953,599681.0,203.0
2021,Aug,system,SAVE,1445,398726.0,276.0
2021,Aug,teller,CHEQ,84,245658.0,2925.0
2021,Aug,teller,SAVE,64,115181.0,1800.0
2021,Dec,atm,CHEQ,8527,-3353174.0,-393.0
2021,Dec,atm,SAVE,9522,-5946619.0,-625.0


3.3 How does the frequency of transactions by loyal customers change over time?

In [9]:
%%sql

SELECT
    EXTRACT(YEAR FROM t.record_date) AS year,
    TO_CHAR(t.record_date, 'Mon') AS month,
    CASE
        WHEN c.number_of_accounts > (
            SELECT AVG(number_of_accounts) FROM customers
        )
        AND (
            SELECT COUNT(*) / COUNT(DISTINCT customer_identifier) FROM transactions
        ) < (
            SELECT COUNT(*) FROM transactions
        )
        THEN 'Loyal Customers'
        ELSE 'Regular Customers'
    END AS customer_type,
    COUNT(*) AS transaction_count,
    ROUND(SUM(t.amt)) AS total_amount
FROM transactions t
JOIN customers c 
ON t.customer_identifier = c.customer_identifier
GROUP BY 
    year, 
    TO_CHAR(t.record_date, 'Mon'), 
    EXTRACT(MONTH FROM t.record_date), 
    customer_type
ORDER BY 
    year, 
    EXTRACT(MONTH FROM t.record_date), 
    customer_type;




 * postgresql://postgres:***@localhost:5432/customerdb
28 rows affected.


year,month,customer_type,transaction_count,total_amount
2021,Jul,Loyal Customers,1409,-2811.0
2021,Jul,Regular Customers,854,-10081.0
2021,Aug,Loyal Customers,4285,-82960.0
2021,Aug,Regular Customers,3165,-14210.0
2021,Sep,Loyal Customers,10468,98382.0
2021,Sep,Regular Customers,7110,126878.0
2021,Oct,Loyal Customers,17272,-158635.0
2021,Oct,Regular Customers,14341,-193059.0
2021,Nov,Loyal Customers,25948,295537.0
2021,Nov,Regular Customers,22038,276822.0


### Monthly Unpaid Transactions and Customer Count over the period of 14 months

In [10]:
%%sql
SELECT 
    CONCAT(EXTRACT(YEAR FROM t.record_date), '-', EXTRACT(MONTH FROM t.record_date)) AS year_month,
    ROUND(SUM(t.amt)) AS total_unpaid,
    COUNT(DISTINCT c.customer_identifier) AS bad_count
FROM transactions t
JOIN customers c ON t.customer_identifier = c.customer_identifier

WHERE transaction_description IN ('BAD DEBT W/OFF', 'DC UNPAID', 'NAEDO DO UNPD', 'ACB DEBIT REVERSAL', 'UNPAID DEBIT')
GROUP BY year_month
ORDER BY year_month ASC

 * postgresql://postgres:***@localhost:5432/customerdb
14 rows affected.


year_month,total_unpaid,bad_count
2021-10,65590.0,60
2021-11,95085.0,85
2021-12,139903.0,148
2021-7,9680.0,9
2021-8,19784.0,21
2021-9,32853.0,34
2022-1,178955.0,200
2022-2,168752.0,202
2022-3,236213.0,231
2022-4,211973.0,233
