### Connecting to Postgresql

In [14]:
pip install sqlalchemy

Note: you may need to restart the kernel to use updated packages.


In [15]:
pip install psycopg2-binary

Note: you may need to restart the kernel to use updated packages.


In [4]:
%load_ext sql
from sqlalchemy import create_engine
%config SqlMagic.style = '_DEPRECATED_DEFAULT'

In [17]:
pip install ipython-sql

Note: you may need to restart the kernel to use updated packages.


In [5]:
%sql postgresql://postgres:@localhost:5432/customerdb

### 1. Customer Demographics

## Overview

### 1.1 How does customer demographics and account usage vary across different income  groups

In [19]:
%%sql SELECT 
    i.income_group_desc AS income_group,
    COUNT(c.customer_identifier) AS Total_Customers,
    ROUND(AVG(c.age)) AS Avg_Age,
    ROUND(AVG(c.number_of_accounts)) AS Avg_Number_of_Accounts
FROM 
    customers c
JOIN 
    income_group i
ON 
    c.income_group_code = i.income_group_code
GROUP BY 
    i.income_group_desc, i.income_group_code
ORDER BY 
    i.income_group_code

 * postgresql://postgres:***@localhost:5432/customerdb
22 rows affected.


income_group,total_customers,avg_age,avg_number_of_accounts
NOT SUPPLIED,2,31,5
R 0 - 499,62,36,8
R500 - 999,30,43,7
"R1,000 - 1,999",96,54,7
"R2,000 - 2,999",82,47,8
"R3,000 - 3,999",150,40,8
"R4,000 - 4,999",438,37,7
"R5,000 - 5,999",373,36,8
"R6,000 - 6,999",357,36,8
"R7,000 - 7,999",331,36,9


### 1.2 Customers are catergorised according to their age group/life stage

In [20]:
%%sql 
SELECT 
    CASE 
        WHEN AGE < 25 THEN '16-24: Youth'
        WHEN AGE BETWEEN 25 AND 34 THEN '25-34: Young Professionals'
        WHEN AGE BETWEEN 35 AND 44 THEN '35-44: Middle-Aged Adults'
        WHEN AGE BETWEEN 45 AND 54 THEN '45-54: Prime Working Years'
        WHEN AGE BETWEEN 55 AND 64 THEN '55-64: Pre-Retirement'
        ELSE '65+: Seniors/Elderly'
    END AS age_group_label,
    COUNT(*) AS customer_count,
    ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER (), 2) AS percentage
FROM customers
GROUP BY age_group_label
ORDER BY age_group_label




 * postgresql://postgres:***@localhost:5432/customerdb
6 rows affected.


age_group_label,customer_count,percentage
16-24: Youth,578,11.24
25-34: Young Professionals,1799,34.97
35-44: Middle-Aged Adults,1498,29.12
45-54: Prime Working Years,885,17.2
55-64: Pre-Retirement,267,5.19
65+: Seniors/Elderly,117,2.27


### 1.3 Customer are catergorised according to their age group/life stage and what income group they belong to

In [21]:
 %%sql
  SELECT 
    CASE 
        WHEN AGE < 25 THEN '16-24: Youth'
        WHEN AGE BETWEEN 25 AND 34 THEN '25-34: Young Professionals'
        WHEN AGE BETWEEN 35 AND 44 THEN '35-44: Middle-Aged Adults'
        WHEN AGE BETWEEN 45 AND 54 THEN '45-54: Prime Working Years'
        WHEN AGE BETWEEN 55 AND 64 THEN '55-64: Pre-Retirement'
        ELSE '65+: Seniors/Elderly'
    END AS age_group_label,
    ig.income_group_desc,
    COUNT(*) AS customer_count,
    ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER (PARTITION BY 
        CASE 
            WHEN AGE < 25 THEN '16-24: Youth'
            WHEN AGE BETWEEN 25 AND 34 THEN '25-34: Young Professionals'
            WHEN AGE BETWEEN 35 AND 44 THEN '35-44: Middle-Aged Adults'
            WHEN AGE BETWEEN 45 AND 54 THEN '45-54: Prime Working Years'
            WHEN AGE BETWEEN 55 AND 64 THEN '55-64: Pre-Retirement'
            ELSE '65+: Seniors/Elderly'
        END), 2) AS percentage_within_age_group,
    ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER (), 2) AS percentage_of_total_customers
FROM customers c
JOIN income_group ig 
    ON c.income_group_code = ig.income_group_code
GROUP BY age_group_label, 
    ig.income_group_desc,
    ig.income_group_code
ORDER BY age_group_label,
    ig.income_group_code
    Limit 10;

 * postgresql://postgres:***@localhost:5432/customerdb
10 rows affected.


age_group_label,income_group_desc,customer_count,percentage_within_age_group,percentage_of_total_customers
16-24: Youth,NOT SUPPLIED,1,0.17,0.02
16-24: Youth,R 0 - 499,14,2.42,0.27
16-24: Youth,R500 - 999,4,0.69,0.08
16-24: Youth,"R1,000 - 1,999",7,1.21,0.14
16-24: Youth,"R2,000 - 2,999",7,1.21,0.14
16-24: Youth,"R3,000 - 3,999",18,3.11,0.35
16-24: Youth,"R4,000 - 4,999",68,11.76,1.32
16-24: Youth,"R5,000 - 5,999",56,9.69,1.09
16-24: Youth,"R6,000 - 6,999",53,9.17,1.03
16-24: Youth,"R7,000 - 7,999",44,7.61,0.86


### 1.4 How does the number of accounts owned by a customer relate to their transaction behavior in terms of count and total amount

In [23]:
%%sql

SELECT
    c.number_of_accounts,
    COUNT(t.transaction_id) AS transaction_count,
    ROUND(SUM(t.amt)) AS total_amount
FROM customers c
JOIN transactions t ON c.customer_identifier = t.customer_identifier
GROUP BY c.number_of_accounts
ORDER BY c.number_of_accounts;

 * postgresql://postgres:***@localhost:5432/customerdb
31 rows affected.


number_of_accounts,transaction_count,total_amount
2,1296,82709.0
3,6830,208634.0
4,19690,875208.0
5,40811,1226795.0
6,59643,1478617.0
7,72220,1103983.0
8,88935,1309127.0
9,78185,1005855.0
10,74680,1022493.0
11,72233,694649.0


### 1.5 Active vs Inactive customers

In [7]:
%%sql 
SELECT 
    customer_status, 
    COUNT(*) AS customer_count, 
    ROUND(100.0 * COUNT(*) / (SELECT COUNT(*) FROM customers), 2) AS percentage
FROM (
    SELECT 
        c.customer_identifier,
        CASE 
            WHEN t.last_transaction_date >= '2022-03-01' THEN 'Active'
            ELSE 'Inactive'
        END AS customer_status
    FROM 
        customers c
    LEFT JOIN (
        SELECT 
            customer_identifier, 
            MAX(record_date) AS last_transaction_date
        FROM 
            transactions
        GROUP BY 
            customer_identifier
    ) t
    ON c.customer_identifier = t.customer_identifier
) sub
GROUP BY customer_status;


 * postgresql://postgres:***@localhost:5432/customerdb
2 rows affected.


customer_status,customer_count,percentage
Active,4794,93.2
Inactive,350,6.8
