### Connecting to Postgresql

In [None]:

pip install sqlalchemy #helps python program to talk to databases

%load_ext sql #loads sql extension
from sqlalchemy import create_engine  #set up a connection between Python and a database
%config SqlMagic.style = '_DEPRECATED_DEFAULT'  #show SQL results using an older style.

In [None]:
pip install psycopg2-binary #programs to connect to and work with PostgreSQL databases

In [3]:
%load_ext sql
from sqlalchemy import create_engine
%config SqlMagic.style = '_DEPRECATED_DEFAULT'

In [None]:
pip install ipython-sql

In [None]:
%sql postgresql://postgres:@localhost:5432/customerdb #connection string

### 2. Transaction Analysis

## Overview

### 2.1 How do different types of transactions and customer behaviors influence the overall transaction landscape?


### 2.1.1 How many transactions do customers make on average, and which types are the most common?

In [6]:
%%sql
SELECT 
    COUNT(t.transaction_id) AS total_transactions,
    COUNT(DISTINCT t.customer_identifier) AS total_customers,
    ROUND(SUM(t.amt)) AS sum_transaction_amount,
    t.transaction_description
FROM 
    transactions t
GROUP BY 
    t.transaction_description
ORDER BY 
    sum_transaction_amount DESC
    Limit 5;


 * postgresql://postgres:***@localhost:5432/customerdb
5 rows affected.


total_transactions,total_customers,sum_transaction_amount,transaction_description
18641,2307,135659407.0,ACB CREDIT
1303,1008,22289971.0,TRANSFER FROM
7731,1214,22157756.0,IBANK PAYMENT FROM
6295,1112,13994160.0,IMMEDIATE TRF CR
11515,769,12146092.0,IBANK TRANSFER


### 2.1.2 What is the breakdown of transaction volume across different transaction channels?

In [7]:
%%sql
SELECT 
    t.channel,
    COUNT(t.transaction_id) AS total_transactions,
    ROUND(SUM(t.amt)) AS sum_transaction_amount
FROM 
    transactions t
GROUP BY 
    t.channel
ORDER BY 
    sum_transaction_amount DESC
    Limit 5;


 * postgresql://postgres:***@localhost:5432/customerdb
4 rows affected.


channel,total_transactions,sum_transaction_amount
system,509581,74810502.0
teller,21464,61690496.0
internet,119238,-29068726.0
atm,197261,-96933397.0


### 2.1.3 How do transaction volumes vary across different income groups?

In [8]:
%%sql

SELECT 
    i.income_group_desc AS income_group,
    COUNT(t.transaction_id) AS total_transactions,
    ROUND(sum(t.amt)) AS sum_transaction_amount
FROM 
    transactions t
JOIN 
    customers c ON t.customer_identifier = c.customer_identifier
JOIN 
    income_group i ON c.income_group_code = i.income_group_code
GROUP BY 
    i.income_group_desc
ORDER BY 
    sum_transaction_amount DESC
    Limit 5;


 * postgresql://postgres:***@localhost:5432/customerdb
5 rows affected.


income_group,total_transactions,sum_transaction_amount
"R8,000 - 8,999",47663,1154261.0
"R10,000 - 11,999",69661,1121973.0
"R20,000 - 24,999",105093,968400.0
"R7,000 - 7,999",47822,918829.0
"R25,000 - 33,999",90633,918054.0


### 2.1.4 How do different income groups use product codes?

In [9]:
%%sql
SELECT 
    i.income_group_desc AS income_group,
    t.product_code,
    COUNT(t.transaction_id) AS total_transactions,
    ROUND(sum(t.amt)) AS sum_transaction_amount
FROM 
    transactions t
JOIN 
    customers c ON t.customer_identifier = c.customer_identifier
JOIN 
    income_group i ON c.income_group_code = i.income_group_code
GROUP BY 
    i.income_group_desc, t.product_code
ORDER BY 
    sum_transaction_amount DESC
    Limit 5;



 * postgresql://postgres:***@localhost:5432/customerdb
5 rows affected.


income_group,product_code,total_transactions,sum_transaction_amount
"R20,000 - 24,999",CHEQ,73552,887761.0
"R4,000 - 4,999",CHEQ,7135,796450.0
"R7,000 - 7,999",CHEQ,19917,783928.0
"R8,000 - 8,999",CHEQ,22222,737025.0
"R25,000 - 33,999",CHEQ,67685,713317.0


### 2.2 How can the bank optimize transaction channels to increase efficiency, reduce costs, and enhance customer satisfaction?

### 2.2.1 What proportion of teller-based transactions could have been conducted on digital platforms, and how can the bank encourage customers to transition to these more efficient channels?

In [10]:
%%sql

SELECT
    t.transaction_description,
    COUNT(*) AS transaction_count,
    CASE 
        WHEN c.age BETWEEN 16 AND 24 THEN '16-24: Youth'
        WHEN c.age BETWEEN 25 AND 34 THEN '25-34: Young Professionals'
        WHEN c.age BETWEEN 35 AND 44 THEN '35-44: Middle-Aged Adults'
        WHEN c.age BETWEEN 45 AND 54 THEN '45-54: Prime Working Years'
        WHEN c.age BETWEEN 55 AND 64 THEN '55-64: Pre-Retirement'
        WHEN c.age >= 65 THEN '65+: Seniors/Elderly'
        ELSE 'Under 16'
    END AS age_group,
    i.income_group_desc
FROM transactions t
JOIN customers c ON t.customer_identifier = c.customer_identifier
JOIN income_group i ON c.income_group_code = i.income_group_code
WHERE t.channel = 'teller'
  AND t.transaction_description IN (
      'BAD DEBT W/OFF',
      'CLOSE C/WITHDRAWAL',
      'CLOSE CASH DEP',
      'CLOSED-TO SAV',
      'DIGITAL TRAN FEES',
      'DIGITAL TRANSF DT',
      'FOREIGN NOTES',
      'IBANK PAYMENT FROM',
      'IBANK PAYMENT TO',
      'IBANK TRANSFER',
      'IMMEDIATE TRF CR',
      'JOURNAL CR C/OPS',
      'JOURNAL CREDIT',
      'JOURNAL DEBIT',
      'NPF CREDIT',
      'NPF DEBIT',
      'OPENED-FROM SAV',
      'TRANSFER COR FROM',
      'TRANSFER FROM',
      'TRANSFER TO',
      'TRANSFER TO CLOSE',
      'TRAVEL FOREX(PFC)',
      'TRAVEL FOREX(TFC)',
      'TRF TO CLOSE COR',
      'UNPAID DEBIT',
      'WESTERN UNION(CBI)',
      'WESTERN UNION(CBO)'
  )
GROUP BY t.transaction_description, age_group, i.income_group_desc
ORDER BY transaction_count DESC
Limit 5
;

 * postgresql://postgres:***@localhost:5432/customerdb
5 rows affected.


transaction_description,transaction_count,age_group,income_group_desc
IMMEDIATE TRF CR,764,25-34: Young Professionals,"R15,000 - 19,999"
IMMEDIATE TRF CR,537,25-34: Young Professionals,"R20,000 - 24,999"
IMMEDIATE TRF CR,394,25-34: Young Professionals,"R25,000 - 33,999"
IMMEDIATE TRF CR,320,16-24: Youth,"R15,000 - 19,999"
IMMEDIATE TRF CR,275,25-34: Young Professionals,"R10,000 - 11,999"


### 2.3 Customers missing payments my income group

In [11]:
%%sql
SELECT 

    ROUND(SUM(t.amt)) AS total_unpaid,
    COUNT(DISTINCT c.customer_identifier) AS bad_count,
    CASE 
        WHEN c.age BETWEEN 16 AND 24 THEN '16-24: Youth'
        WHEN c.age BETWEEN 25 AND 34 THEN '25-34: Young Professionals'
        WHEN c.age BETWEEN 35 AND 44 THEN '35-44: Middle-Aged Adults'
        WHEN c.age BETWEEN 45 AND 54 THEN '45-54: Prime Working Years'
        WHEN c.age BETWEEN 55 AND 64 THEN '55-64: Pre-Retirement'
        WHEN c.age >= 65 THEN '65+: Seniors/Elderly'
        ELSE 'Under 16'
    END AS age_group
FROM transactions t
JOIN customers c ON t.customer_identifier = c.customer_identifier
WHERE t.transaction_description IN ('BAD DEBT W/OFF', 'DC UNPAID', 'NAEDO DO UNPD', 'ACB DEBIT REVERSAL', 'UNPAID DEBIT')
GROUP BY  age_group
ORDER BY age_group


 * postgresql://postgres:***@localhost:5432/customerdb
6 rows affected.


total_unpaid,bad_count,age_group
196986.0,108,16-24: Youth
677021.0,370,25-34: Young Professionals
597368.0,324,35-44: Middle-Aged Adults
276494.0,155,45-54: Prime Working Years
64699.0,31,55-64: Pre-Retirement
4166.0,3,65+: Seniors/Elderly


Customers Investing according to their age group

In [12]:
%%sql
SELECT 
    COUNT(*) AS investment_transaction_count,
    CASE 
        WHEN c.age BETWEEN 16 AND 24 THEN '16-24: Youth'
        WHEN c.age BETWEEN 25 AND 34 THEN '25-34: Young Professionals'
        WHEN c.age BETWEEN 35 AND 44 THEN '35-44: Middle-Aged Adults'
        WHEN c.age BETWEEN 45 AND 54 THEN '45-54: Prime Working Years'
        WHEN c.age BETWEEN 55 AND 64 THEN '55-64: Pre-Retirement'
        WHEN c.age >= 65 THEN '65+: Seniors/Elderly'
        ELSE 'Under 16'
    END AS age_group_label,
    SUM(t.amt) AS total_investment_amount
FROM transactions t
JOIN customers c ON t.customer_identifier = c.customer_identifier
JOIN income_group ig ON ig.income_group_code = c.income_group_code
WHERE t.transaction_description LIKE '%INVEST%' -- Transactions like INVESTMENT
GROUP BY age_group_label
ORDER BY total_investment_amount DESC;

 * postgresql://postgres:***@localhost:5432/customerdb
6 rows affected.


investment_transaction_count,age_group_label,total_investment_amount
21,25-34: Young Professionals,98377.48
26,16-24: Youth,76493.92000000001
11,45-54: Prime Working Years,71829.97
11,55-64: Pre-Retirement,39864.9
9,65+: Seniors/Elderly,37760.02
3,35-44: Middle-Aged Adults,14838.0
