### Connecting to Postgresql

In [34]:
pip install sqlalchemy

Note: you may need to restart the kernel to use updated packages.


In [35]:
pip install psycopg2-binary

Note: you may need to restart the kernel to use updated packages.


In [36]:
%load_ext sql
from sqlalchemy import create_engine
# %config SqlMagic.style = '_DEPRECATED_DEFAULT'

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


In [37]:
pip install ipython-sql

Note: you may need to restart the kernel to use updated packages.


In [38]:
%sql postgresql://postgres:@localhost:5432/customerdb

### 3. Periodic Analysis

## Overview

### 3.1.1 How does the transaction activity of customers from different vary over time?

In [39]:

%%sql
SELECT
    CONCAT(EXTRACT(YEAR FROM t.record_date), '-', EXTRACT(MONTH FROM t.record_date)) AS year_month,
    i.income_group_desc as income_group,
    COUNT(*) AS transaction_count,
    ROUND(SUM(amt)) AS total_transaction_value,
    ROUND(AVG(amt)) AS avg_transaction_value
FROM transactions t
JOIN customers c ON t.customer_identifier = c.customer_identifier
JOIN income_group i ON c.income_group_code = i.income_group_code
WHERE i.income_group_desc != 'NOT SUPPLIED'
GROUP BY year_month, i.income_group_desc
ORDER BY year_month, i.income_group_desc
limit 10;

 * postgresql://postgres:***@localhost:5432/customerdb
10 rows affected.


year_month,income_group,transaction_count,total_transaction_value,avg_transaction_value
2021-10,R 0 - 499,614,11240.0,18.0
2021-10,"R1,000 - 1,999",186,3419.0,18.0
2021-10,"R10,000 - 11,999",2423,-102420.0,-42.0
2021-10,"R12,000 - 14,999",2655,-53336.0,-20.0
2021-10,"R15,000 - 19,999",4957,31961.0,6.0
2021-10,"R2,000 - 2,999",156,12780.0,82.0
2021-10,"R20,000 - 24,999",4152,-56292.0,-14.0
2021-10,"R25,000 - 33,999",4613,-49917.0,-11.0
2021-10,"R3,000 - 3,999",371,18809.0,51.0
2021-10,"R34,000 - 41,999",2056,-30124.0,-15.0



### 3.1.2 What is the correlation between transaction counts and total transaction amounts for each product code?

In [40]:
%%sql

SELECT
    CONCAT(EXTRACT(YEAR FROM t.record_date), '-', EXTRACT(MONTH FROM t.record_date)) AS year_month,
    t.product_code,
    COUNT(customer_identifier) AS transaction_count, 
    ROUND(SUM(amt)) AS total_amt
FROM transactions t
GROUP BY year_month, t.product_code
ORDER BY year_month, t.product_code;


 * postgresql://postgres:***@localhost:5432/customerdb
28 rows affected.


year_month,product_code,transaction_count,total_amt
2021-10,CHEQ,18514,-371630.0
2021-10,SAVE,13099,19936.0
2021-11,CHEQ,27513,302617.0
2021-11,SAVE,20473,269743.0
2021-12,CHEQ,43471,-839668.0
2021-12,SAVE,36801,26185.0
2021-7,CHEQ,1436,-15401.0
2021-7,SAVE,827,2509.0
2021-8,CHEQ,4821,-104053.0
2021-8,SAVE,2629,6884.0


### 3.2.1 What is the correlation between transaction counts, total transaction amounts, and product code for system based channel?

In [41]:
%%sql

    SELECT
    CONCAT(EXTRACT(YEAR FROM t.record_date), '-', EXTRACT(MONTH FROM t.record_date)) AS year_month,
        t.product_code,
        COUNT(*) AS transaction_count,
        ROUND(SUM(t.amt)) AS Total_Amount,
        ROUND(AVG(t.amt)) AS Avg_Amount
    FROM transactions t
    JOIN customers c ON t.customer_identifier = c.customer_identifier
    WHERE channel = 'system'
    GROUP BY 
    year_month,
    t.product_code, t.transaction_description
    ORDER BY year_month,Total_Amount DESC
    limit 10;

 * postgresql://postgres:***@localhost:5432/customerdb
10 rows affected.


year_month,product_code,transaction_count,total_amount,avg_amount
2021-10,CHEQ,770,5670798.0,7365.0
2021-10,SAVE,7611,1385725.0,182.0
2021-10,CHEQ,21,116379.0,5542.0
2021-10,CHEQ,56,34054.0,608.0
2021-10,CHEQ,1,20000.0,20000.0
2021-10,CHEQ,15,17085.0,1139.0
2021-10,CHEQ,10,8297.0,830.0
2021-10,CHEQ,5,6360.0,1272.0
2021-10,CHEQ,135,3419.0,25.0
2021-10,CHEQ,3,3151.0,1050.0


### 3.2.2 What is the correlation between transaction counts, total transaction amounts, and product code for internet based channel?

In [42]:
%%sql

    SELECT
    CONCAT(EXTRACT(YEAR FROM t.record_date), '-', EXTRACT(MONTH FROM t.record_date)) AS year_month,
        t.product_code,
        COUNT(*) AS transaction_count,
        ROUND(SUM(t.amt)) AS Total_Amount,
        ROUND(AVG(t.amt)) AS Avg_Amount
    FROM transactions t
    JOIN customers c ON t.customer_identifier = c.customer_identifier
    WHERE channel = 'internet'
    GROUP BY 
    year_month,
    t.product_code, t.transaction_description
    ORDER BY year_month,Total_Amount DESC
    limit 10;

 * postgresql://postgres:***@localhost:5432/customerdb
10 rows affected.


year_month,product_code,transaction_count,total_amount,avg_amount
2021-10,CHEQ,206,673650.0,3270.0
2021-10,CHEQ,503,440495.0,876.0
2021-10,SAVE,1528,201428.0,132.0
2021-10,CHEQ,26,19810.0,762.0
2021-10,CHEQ,2,5278.0,2639.0
2021-10,CHEQ,2,600.0,300.0
2021-10,CHEQ,11,-2980.0,-271.0
2021-10,CHEQ,21,-4760.0,-227.0
2021-10,CHEQ,76,-37120.0,-488.0
2021-10,CHEQ,25,-77847.0,-3114.0


### 3.2.3 What is the correlation between transaction counts, total transaction amounts, and product code for atm based channel?

In [43]:
%%sql

    SELECT
    CONCAT(EXTRACT(YEAR FROM t.record_date), '-', EXTRACT(MONTH FROM t.record_date)) AS year_month,
        t.product_code,
        COUNT(*) AS transaction_count,
        ROUND(SUM(t.amt)) AS Total_Amount,
        ROUND(AVG(t.amt)) AS Avg_Amount
    FROM transactions t
    JOIN customers c ON t.customer_identifier = c.customer_identifier
    WHERE channel = 'atm'
    GROUP BY 
    year_month,
    t.product_code, t.transaction_description
    ORDER BY year_month,Total_Amount DESC
    limit 10;

 * postgresql://postgres:***@localhost:5432/customerdb
10 rows affected.


year_month,product_code,transaction_count,total_amount,avg_amount
2021-10,CHEQ,52,83450.0,1605.0
2021-10,CHEQ,32,57600.0,1800.0
2021-10,CHEQ,25,51690.0,2068.0
2021-10,CHEQ,8,16500.0,2062.0
2021-10,CHEQ,3,3590.0,1197.0
2021-10,CHEQ,10,1454.0,145.0
2021-10,CHEQ,2,75.0,38.0
2021-10,CHEQ,2,-850.0,-425.0
2021-10,CHEQ,506,-14222.0,-28.0
2021-10,CHEQ,15,-16390.0,-1093.0


### 3.2.4 What is the correlation between transaction counts, total transaction amounts, and product code for teller based channel?

In [44]:
%%sql

    SELECT
    CONCAT(EXTRACT(YEAR FROM t.record_date), '-', EXTRACT(MONTH FROM t.record_date)) AS year_month,
        t.product_code,
        COUNT(*) AS transaction_count,
        ROUND(SUM(t.amt)) AS Total_Amount,
        ROUND(AVG(t.amt)) AS Avg_Amount
    FROM transactions t
    JOIN customers c ON t.customer_identifier = c.customer_identifier
    WHERE channel = 'teller'
    GROUP BY 
    year_month,
    t.product_code, t.transaction_description
    ORDER BY year_month,Total_Amount DESC
    limit 10;

 * postgresql://postgres:***@localhost:5432/customerdb


10 rows affected.


year_month,product_code,transaction_count,total_amount,avg_amount
2021-10,CHEQ,166,559899.0,3373.0
2021-10,SAVE,256,506947.0,1980.0
2021-10,CHEQ,33,472741.0,14325.0
2021-10,CHEQ,19,43377.0,2283.0
2021-10,CHEQ,3,21100.0,7033.0
2021-10,CHEQ,4,14392.0,3598.0
2021-10,CHEQ,22,12210.0,555.0
2021-10,CHEQ,4,8685.0,2171.0
2021-10,CHEQ,9,2546.0,283.0
2021-10,CHEQ,3,2391.0,797.0


### 3.3.1 How does the frequency of transactions by loyal and customers change over time?

In [45]:
%%sql
WITH customer_transactions AS (
    SELECT
        CONCAT(EXTRACT(YEAR FROM t.record_date), '-', EXTRACT(MONTH FROM t.record_date)) AS year_month,
        CASE
            WHEN c.number_of_accounts > (
                SELECT AVG(number_of_accounts) FROM customers
            )
            AND (
                SELECT COUNT(*) / COUNT(DISTINCT customer_identifier) FROM transactions
            ) < (
                SELECT COUNT(*) FROM transactions
            )
            THEN 'Loyal Customers'
            ELSE 'Regular Customers'
        END AS customer_type,
        COUNT(*) AS transaction_count,
        ROUND(SUM(t.amt)) AS total_amount,
        ROUND(AVG(t.amt)) AS avg_amount
    FROM transactions t
    JOIN customers c 
    ON t.customer_identifier = c.customer_identifier
    GROUP BY 
        year_month,
        customer_type
)
SELECT *
FROM customer_transactions
WHERE customer_type = 'Loyal Customers'
ORDER BY 
    year_month;


 * postgresql://postgres:***@localhost:5432/customerdb


14 rows affected.


year_month,customer_type,transaction_count,total_amount,avg_amount
2021-10,Loyal Customers,17272,-158635.0,-9.0
2021-11,Loyal Customers,25948,295537.0,11.0
2021-12,Loyal Customers,44517,-608044.0,-14.0
2021-7,Loyal Customers,1409,-2810.0,-2.0
2021-8,Loyal Customers,4285,-82960.0,-19.0
2021-9,Loyal Customers,10468,98382.0,9.0
2022-1,Loyal Customers,48983,376063.0,8.0
2022-2,Loyal Customers,56382,547304.0,10.0
2022-3,Loyal Customers,65811,-118010.0,-2.0
2022-4,Loyal Customers,57000,180697.0,3.0


### 3.3.2 How does the frequency of transactions by regular customers change over time?

In [46]:
%%sql
WITH customer_transactions AS (
    SELECT
        CONCAT(EXTRACT(YEAR FROM t.record_date), '-', EXTRACT(MONTH FROM t.record_date)) AS year_month,
        CASE
            WHEN c.number_of_accounts > (
                SELECT AVG(number_of_accounts) FROM customers
            )
            AND (
                SELECT COUNT(*) / COUNT(DISTINCT customer_identifier) FROM transactions
            ) < (
                SELECT COUNT(*) FROM transactions
            )
            THEN 'Loyal Customers'
            ELSE 'Regular Customers'
        END AS customer_type,
        COUNT(*) AS transaction_count,
        ROUND(SUM(t.amt)) AS total_amount,
        ROUND(AVG(t.amt)) AS avg_amount
    FROM transactions t
    JOIN customers c 
    ON t.customer_identifier = c.customer_identifier
    GROUP BY 
        year_month,
        customer_type
)
SELECT *
FROM customer_transactions
WHERE customer_type = 'Regular Customers'
ORDER BY 
    year_month;


 * postgresql://postgres:***@localhost:5432/customerdb
14 rows affected.


year_month,customer_type,transaction_count,total_amount,avg_amount
2021-10,Regular Customers,14341,-193059.0,-13.0
2021-11,Regular Customers,22038,276822.0,13.0
2021-12,Regular Customers,35755,-205440.0,-6.0
2021-7,Regular Customers,854,-10081.0,-12.0
2021-8,Regular Customers,3165,-14210.0,-4.0
2021-9,Regular Customers,7110,126878.0,18.0
2022-1,Regular Customers,37165,155740.0,4.0
2022-2,Regular Customers,42992,1330452.0,31.0
2022-3,Regular Customers,50425,301836.0,6.0
2022-4,Regular Customers,43561,1129868.0,26.0


### 3.4 Monthly Unpaid Transactions and Customer Count over the period of 14 months

In [47]:
%%sql
SELECT 
    CONCAT(EXTRACT(YEAR FROM t.record_date), '-', EXTRACT(MONTH FROM t.record_date)) AS year_month,
    ROUND(SUM(t.amt)) AS total_unpaid,
    COUNT(DISTINCT c.customer_identifier) AS bad_count
FROM transactions t
JOIN customers c ON t.customer_identifier = c.customer_identifier

WHERE transaction_description IN ('BAD DEBT W/OFF', 'DC UNPAID', 'NAEDO DO UNPD', 'ACB DEBIT REVERSAL', 'UNPAID DEBIT')
GROUP BY year_month
ORDER BY year_month ASC

 * postgresql://postgres:***@localhost:5432/customerdb
14 rows affected.


year_month,total_unpaid,bad_count
2021-10,65590.0,60
2021-11,95085.0,85
2021-12,139903.0,148
2021-7,9680.0,9
2021-8,19784.0,21
2021-9,32853.0,34
2022-1,178955.0,200
2022-2,168752.0,202
2022-3,236213.0,231
2022-4,211973.0,233
