### Connecting to Postgresql

In [45]:

pip install sqlalchemy

Note: you may need to restart the kernel to use updated packages.


In [46]:
pip install psycopg2-binary

Note: you may need to restart the kernel to use updated packages.


In [47]:
%load_ext sql
from sqlalchemy import create_engine
%config SqlMagic.style = '_DEPRECATED_DEFAULT'

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


In [48]:
pip install ipython-sql

Note: you may need to restart the kernel to use updated packages.


In [49]:
%sql postgresql://postgres:@localhost:5432/customerdb

### 2. Transaction Analysis

## Overview

### 2.1 How do customers from different income groups and age brackets contribute to transaction activity across various channels?

In [50]:

%%sql
SELECT 
    i.income_group_desc AS income_group,
    CASE 
        WHEN c.age BETWEEN 16 AND 24 THEN '16-24: Youth'
        WHEN c.age BETWEEN 25 AND 34 THEN '25-34: Young Professionals'
        WHEN c.age BETWEEN 35 AND 44 THEN '35-44: Middle-Aged Adults'
        WHEN c.age BETWEEN 45 AND 54 THEN '45-54: Prime Working Years'
        WHEN c.age BETWEEN 55 AND 64 THEN '55-64: Pre-Retirement'
        WHEN c.age >= 65 THEN '65+: Seniors/Elderly'
        ELSE 'Under 16'
    END AS age_group,
    t.channel,
    COUNT(t.transaction_id) AS transaction_count,
    ROUND(SUM(t.amt)) AS total_transaction_value,
    ROUND(AVG(t.amt)) AS avg_transaction_value
FROM 
    transactions t
JOIN 
    customers c ON t.customer_identifier = c.customer_identifier
JOIN 
    income_group i ON c.income_group_code = i.income_group_code
GROUP BY 
    i.income_group_desc, age_group, t.channel
ORDER BY 
    i.income_group_desc, age_group, t.channel
    Limit 10;

 * postgresql://postgres:***@localhost:5432/customerdb


10 rows affected.


income_group,age_group,channel,transaction_count,total_transaction_value,avg_transaction_value
NOT SUPPLIED,16-24: Youth,atm,23,-3125.0,-136.0
NOT SUPPLIED,16-24: Youth,internet,23,-21886.0,-952.0
NOT SUPPLIED,16-24: Youth,system,77,18850.0,245.0
NOT SUPPLIED,16-24: Youth,teller,6,2170.0,362.0
NOT SUPPLIED,35-44: Middle-Aged Adults,system,7,0.0,0.0
R 0 - 499,16-24: Youth,atm,549,-120399.0,-219.0
R 0 - 499,16-24: Youth,internet,732,-68778.0,-94.0
R 0 - 499,16-24: Youth,system,1931,157562.0,82.0
R 0 - 499,16-24: Youth,teller,57,55729.0,978.0
R 0 - 499,25-34: Young Professionals,atm,1038,-296074.0,-285.0


### 2.3.1 Which transaction descriptions dominate system based channel and how much revenue/expense do they generate?

In [51]:
%%sql

SELECT
    transaction_description,
    channel,
    COUNT(*) AS Transaction_Count,
    ROUND(SUM(amt)) AS Total_Amount,
    ROUND(AVG(amt)) AS Avg_Amount
FROM transactions
WHERE transaction_description IS NOT NULL AND channel = 'system'
GROUP BY transaction_description, channel
ORDER BY transaction_count

 * postgresql://postgres:***@localhost:5432/customerdb
58 rows affected.


transaction_description,channel,transaction_count,total_amount,avg_amount
POS REFUND PUR,system,1,1265.0,1265.0
DO EXT DISP BRANCH,system,1,190.0,190.0
EXTERNAL CREDIT,system,1,7326.0,7326.0
INTEREST TO CLOSE,system,2,-12.0,-6.0
ACB INT DISP ONLNE,system,4,1168.0,292.0
ACB INT DISP BRNCH,system,4,8689.0,2172.0
DO EXT DISP ONLNE,system,7,1059.0,151.0
NAEDO TRCK DO DISP,system,8,7720.0,965.0
OCT OCARD CREDIT,system,9,14260.0,1584.0
ACT PAYMENT FROM,system,11,28465.0,2588.0


### 2.3.2 Which transaction descriptions dominate internet based channel and how much revenue/expense do they generate?

In [52]:
%%sql

SELECT
    transaction_description,
    channel,
    COUNT(*) AS Transaction_Count,
    ROUND(SUM(amt)) AS Total_Amount,
    ROUND(AVG(amt)) AS Avg_Amount
FROM transactions
WHERE transaction_description IS NOT NULL AND channel = 'internet'
GROUP BY transaction_description, channel
ORDER BY transaction_count

 * postgresql://postgres:***@localhost:5432/customerdb
29 rows affected.


transaction_description,channel,transaction_count,total_amount,avg_amount
TELEP PAYMENT CT,internet,1,1200.0,1200.0
MOBILE PAYMENT CR,internet,1,2000.0,2000.0
MOBILE TRANSFER CR,internet,2,130.0,65.0
DIGITAL VOUCHERS,internet,2,-266.0,-133.0
TEL CR TRANSFER,internet,4,194.0,48.0
DC TRACK INT DISP,internet,5,11054.0,2211.0
ATM WITHDRAWAL,internet,13,-26700.0,-2054.0
MOBILE TRANSFER DT,internet,14,-25497.0,-1821.0
MOBILE PAYMENT DT,internet,15,-22100.0,-1473.0
CASHSEND MB,internet,20,-5580.0,-279.0


### 2.3.3 Which transaction descriptions dominate teller based channel and how much revenue/expense do they generate?

In [53]:
%%sql

SELECT
    transaction_description,
    channel,
    COUNT(*) AS Transaction_Count,
    ROUND(SUM(amt)) AS Total_Amount,
    ROUND(AVG(amt)) AS Avg_Amount
FROM transactions
WHERE transaction_description IS NOT NULL AND channel = 'teller'
GROUP BY transaction_description, channel
ORDER BY transaction_count

 * postgresql://postgres:***@localhost:5432/customerdb
32 rows affected.


transaction_description,channel,transaction_count,total_amount,avg_amount
JOURNAL CR C/OPS,teller,1,4400.0,4400.0
TRF TO CLOSE COR,teller,1,50.0,50.0
TRANSFER COR FROM,teller,1,-50.0,-50.0
BILLS NEGOTIATED,teller,1,1356.0,1356.0
TRAVEL FOREX(PFC),teller,2,11094.0,5547.0
TRAVEL FOREX(TFC),teller,2,-35901.0,-17950.0
IBANK TRANSFER,teller,2,-150.0,-75.0
PINP CLNT CASH WDL,teller,3,-22800.0,-7600.0
DIGITAL TRAN FEES,teller,3,-200.0,-67.0
IBANK PAYMENT TO,teller,3,-540.0,-180.0


### 2.3.4 Which transaction descriptions dominate atm based channel and how much revenue/expense do they generate?

In [54]:
%%sql

SELECT
    transaction_description,
    channel,
    COUNT(*) AS Transaction_Count,
    ROUND(SUM(amt)) AS Total_Amount,
    ROUND(AVG(amt)) AS Avg_Amount
FROM transactions
WHERE transaction_description IS NOT NULL AND channel = 'atm'
GROUP BY transaction_description, channel
ORDER BY transaction_count

 * postgresql://postgres:***@localhost:5432/customerdb
20 rows affected.


transaction_description,channel,transaction_count,total_amount,avg_amount
AIRTIME DBT ATMS,atm,1,-99.0,-99.0
NOTE DEPOSIT,atm,2,1050.0,525.0
CARDLESS NOTE DEP,atm,2,2850.0,1425.0
SSD OTHER CHIP WDL,atm,3,-1260.0,-420.0
AIRTIME DEBIT COR,atm,6,294.0,49.0
PREPAID DEBITÂ COR,atm,12,2330.0,194.0
SCAN TRANSFER TO,atm,13,-4370.0,-336.0
CAN CASHSEND ATM,atm,25,18600.0,744.0
ATM PAYMENT FR,atm,80,339228.0,4240.0
PREPAID DEBIT COR,atm,139,19154.0,138.0


### 2.4 What proportion of teller-based transactions could have been conducted on digital platforms, and how can the bank encourage customers to transition to these more efficient channels?

In [57]:
%%sql

SELECT
    t.transaction_description,
    COUNT(*) AS transaction_count,
    CASE 
        WHEN c.age BETWEEN 16 AND 24 THEN '16-24: Youth'
        WHEN c.age BETWEEN 25 AND 34 THEN '25-34: Young Professionals'
        WHEN c.age BETWEEN 35 AND 44 THEN '35-44: Middle-Aged Adults'
        WHEN c.age BETWEEN 45 AND 54 THEN '45-54: Prime Working Years'
        WHEN c.age BETWEEN 55 AND 64 THEN '55-64: Pre-Retirement'
        WHEN c.age >= 65 THEN '65+: Seniors/Elderly'
        ELSE 'Under 16'
    END AS age_group,
    i.income_group_desc
FROM transactions t
JOIN customers c ON t.customer_identifier = c.customer_identifier
JOIN income_group i ON c.income_group_code = i.income_group_code
WHERE t.channel = 'teller'
  AND t.transaction_description IN (
      'BAD DEBT W/OFF',
      'CLOSE C/WITHDRAWAL',
      'CLOSE CASH DEP',
      'CLOSED-TO SAV',
      'DIGITAL TRAN FEES',
      'DIGITAL TRANSF DT',
      'FOREIGN NOTES',
      'IBANK PAYMENT FROM',
      'IBANK PAYMENT TO',
      'IBANK TRANSFER',
      'IMMEDIATE TRF CR',
      'JOURNAL CR C/OPS',
      'JOURNAL CREDIT',
      'JOURNAL DEBIT',
      'NPF CREDIT',
      'NPF DEBIT',
      'OPENED-FROM SAV',
      'TRANSFER COR FROM',
      'TRANSFER FROM',
      'TRANSFER TO',
      'TRANSFER TO CLOSE',
      'TRAVEL FOREX(PFC)',
      'TRAVEL FOREX(TFC)',
      'TRF TO CLOSE COR',
      'UNPAID DEBIT',
      'WESTERN UNION(CBI)',
      'WESTERN UNION(CBO)'
  )
GROUP BY t.transaction_description, age_group, i.income_group_desc
ORDER BY transaction_count DESC
Limit 10;

 * postgresql://postgres:***@localhost:5432/customerdb


10 rows affected.


transaction_description,transaction_count,age_group,income_group_desc
IMMEDIATE TRF CR,764,25-34: Young Professionals,"R15,000 - 19,999"
IMMEDIATE TRF CR,537,25-34: Young Professionals,"R20,000 - 24,999"
IMMEDIATE TRF CR,394,25-34: Young Professionals,"R25,000 - 33,999"
IMMEDIATE TRF CR,320,16-24: Youth,"R15,000 - 19,999"
IMMEDIATE TRF CR,275,25-34: Young Professionals,"R10,000 - 11,999"
IMMEDIATE TRF CR,273,25-34: Young Professionals,"R34,000 - 41,999"
IMMEDIATE TRF CR,197,35-44: Middle-Aged Adults,"R12,000 - 14,999"
IMMEDIATE TRF CR,191,35-44: Middle-Aged Adults,"R25,000 - 33,999"
IMMEDIATE TRF CR,184,35-44: Middle-Aged Adults,"R15,000 - 19,999"
IMMEDIATE TRF CR,177,25-34: Young Professionals,"R12,000 - 14,999"


### 2.5 Customers missing payments my income group

In [56]:
%%sql
SELECT 

    ROUND(SUM(t.amt)) AS total_unpaid,
    COUNT(DISTINCT c.customer_identifier) AS bad_count,
    CASE 
        WHEN c.age BETWEEN 16 AND 24 THEN '16-24: Youth'
        WHEN c.age BETWEEN 25 AND 34 THEN '25-34: Young Professionals'
        WHEN c.age BETWEEN 35 AND 44 THEN '35-44: Middle-Aged Adults'
        WHEN c.age BETWEEN 45 AND 54 THEN '45-54: Prime Working Years'
        WHEN c.age BETWEEN 55 AND 64 THEN '55-64: Pre-Retirement'
        WHEN c.age >= 65 THEN '65+: Seniors/Elderly'
        ELSE 'Under 16'
    END AS age_group
FROM transactions t
JOIN customers c ON t.customer_identifier = c.customer_identifier
WHERE t.transaction_description IN ('BAD DEBT W/OFF', 'DC UNPAID', 'NAEDO DO UNPD', 'ACB DEBIT REVERSAL', 'UNPAID DEBIT')
GROUP BY  age_group
ORDER BY age_group


 * postgresql://postgres:***@localhost:5432/customerdb


6 rows affected.


total_unpaid,bad_count,age_group
196986.0,108,16-24: Youth
677021.0,370,25-34: Young Professionals
597368.0,324,35-44: Middle-Aged Adults
276494.0,155,45-54: Prime Working Years
64699.0,31,55-64: Pre-Retirement
4166.0,3,65+: Seniors/Elderly
