### Connecting to Postgresql

In [1]:
pip install sqlalchemy

Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install psycopg2-binary

Note: you may need to restart the kernel to use updated packages.


In [3]:
%load_ext sql
from sqlalchemy import create_engine
%config SqlMagic.style = '_DEPRECATED_DEFAULT'

In [4]:
pip install ipython-sql

Note: you may need to restart the kernel to use updated packages.


In [5]:
%sql postgresql://postgres:@localhost:5432/customerdb

### Group customers by income group and calculate basic statistics for each group.

In [9]:
%%sql SELECT 
    i.income_group_desc AS income_group,
    COUNT(c.customer_identifier) AS Total_Customers,
    ROUND(AVG(c.age)) AS Avg_Age,
    ROUND(AVG(c.number_of_accounts)) AS Avg_Number_of_Accounts
FROM 
    customers c
JOIN 
    income_group i
ON 
    c.income_group_code = i.income_group_code
GROUP BY 
    i.income_group_desc, i.income_group_code
ORDER BY 
    i.income_group_code

 * postgresql://postgres:***@localhost:5432/customerdb
22 rows affected.


income_group,total_customers,avg_age,avg_number_of_accounts
NOT SUPPLIED,2,31,5
R 0 - 499,62,36,8
R500 - 999,30,43,7
"R1,000 - 1,999",96,54,7
"R2,000 - 2,999",82,47,8
"R3,000 - 3,999",150,40,8
"R4,000 - 4,999",438,37,7
"R5,000 - 5,999",373,36,8
"R6,000 - 6,999",357,36,8
"R7,000 - 7,999",331,36,9


### Customer are catergorised according to their age group/life stage

In [8]:
%%sql 
SELECT 
    CASE 
        WHEN AGE < 25 THEN '16-24: Youth'
        WHEN AGE BETWEEN 25 AND 34 THEN '25-34: Young Professionals'
        WHEN AGE BETWEEN 35 AND 44 THEN '35-44: Middle-Aged Adults'
        WHEN AGE BETWEEN 45 AND 54 THEN '45-54: Prime Working Years'
        WHEN AGE BETWEEN 55 AND 64 THEN '55-64: Pre-Retirement'
        ELSE '65+: Seniors/Elderly'
    END AS age_group_label,
    COUNT(*) AS customer_count,
    ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER (), 2) AS percentage
FROM customers
GROUP BY 
    CASE 
        WHEN AGE < 25 THEN '16-24: Youth'
        WHEN AGE BETWEEN 25 AND 34 THEN '25-34: Young Professionals'
        WHEN AGE BETWEEN 35 AND 44 THEN '35-44: Middle-Aged Adults'
        WHEN AGE BETWEEN 45 AND 54 THEN '45-54: Prime Working Years'
        WHEN AGE BETWEEN 55 AND 64 THEN '55-64: Pre-Retirement'
        ELSE '65+: Seniors/Elderly'
    END
ORDER BY age_group_label




 * postgresql://postgres:***@localhost:5432/customerdb
6 rows affected.


age_group_label,customer_count,percentage
16-24: Youth,578,11.24
25-34: Young Professionals,1799,34.97
35-44: Middle-Aged Adults,1498,29.12
45-54: Prime Working Years,885,17.2
55-64: Pre-Retirement,267,5.19
65+: Seniors/Elderly,117,2.27


### Customer are catergorised according to their age group/life stage and what income group they belong to

In [10]:
 %%sql
 SELECT 
    CASE 
        WHEN AGE < 25 THEN '16-24: Youth'
        WHEN AGE BETWEEN 25 AND 34 THEN '25-34: Young Professionals'
        WHEN AGE BETWEEN 35 AND 44 THEN '35-44: Middle-Aged Adults'
        WHEN AGE BETWEEN 45 AND 54 THEN '45-54: Prime Working Years'
        WHEN AGE BETWEEN 55 AND 64 THEN '55-64: Pre-Retirement'
        ELSE '65+: Seniors/Elderly'
    END AS age_group_label,
    ig.income_group_desc,
    COUNT(*) AS customer_count,
    ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER (PARTITION BY 
        CASE 
            WHEN AGE < 25 THEN '16-24: Youth'
            WHEN AGE BETWEEN 25 AND 34 THEN '25-34: Young Professionals'
            WHEN AGE BETWEEN 35 AND 44 THEN '35-44: Middle-Aged Adults'
            WHEN AGE BETWEEN 45 AND 54 THEN '45-54: Prime Working Years'
            WHEN AGE BETWEEN 55 AND 64 THEN '55-64: Pre-Retirement'
            ELSE '65+: Seniors/Elderly'
        END), 2) AS percentage_within_age_group,
    ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER (), 2) AS percentage_of_total_customers
FROM customers c
JOIN income_group ig 
    ON c.income_group_code = ig.income_group_code
GROUP BY 
    CASE 
        WHEN AGE < 25 THEN '16-24: Youth'
        WHEN AGE BETWEEN 25 AND 34 THEN '25-34: Young Professionals'
        WHEN AGE BETWEEN 35 AND 44 THEN '35-44: Middle-Aged Adults'
        WHEN AGE BETWEEN 45 AND 54 THEN '45-54: Prime Working Years'
        WHEN AGE BETWEEN 55 AND 64 THEN '55-64: Pre-Retirement'
        ELSE '65+: Seniors/Elderly'
    END, 
    ig.income_group_desc,
    ig.income_group_code
ORDER BY 
    CASE 
        WHEN AGE < 25 THEN '16-24: Youth'
        WHEN AGE BETWEEN 25 AND 34 THEN '25-34: Young Professionals'
        WHEN AGE BETWEEN 35 AND 44 THEN '35-44: Middle-Aged Adults'
        WHEN AGE BETWEEN 45 AND 54 THEN '45-54: Prime Working Years'
        WHEN AGE BETWEEN 55 AND 64 THEN '55-64: Pre-Retirement'
        ELSE '65+: Seniors/Elderly'
    END,
    ig.income_group_code;

 * postgresql://postgres:***@localhost:5432/customerdb
120 rows affected.


age_group_label,income_group_desc,customer_count,percentage_within_age_group,percentage_of_total_customers
16-24: Youth,NOT SUPPLIED,1,0.17,0.02
16-24: Youth,R 0 - 499,14,2.42,0.27
16-24: Youth,R500 - 999,4,0.69,0.08
16-24: Youth,"R1,000 - 1,999",7,1.21,0.14
16-24: Youth,"R2,000 - 2,999",7,1.21,0.14
16-24: Youth,"R3,000 - 3,999",18,3.11,0.35
16-24: Youth,"R4,000 - 4,999",68,11.76,1.32
16-24: Youth,"R5,000 - 5,999",56,9.69,1.09
16-24: Youth,"R6,000 - 6,999",53,9.17,1.03
16-24: Youth,"R7,000 - 7,999",44,7.61,0.86


### Identify Top Segments with High Account Usage

In [11]:
%%sql SELECT 
    c.customer_identifier,
    i.income_group_desc AS Income_Group,
    c.age,
    c.number_of_accounts
FROM 
    customers c
JOIN 
    income_group i
ON 
    c.income_group_code = i.income_group_code
WHERE 
    c.number_of_accounts = (
        SELECT MAX(number_of_accounts) FROM customers
    )
ORDER BY 
    c.number_of_accounts DESC;

 * postgresql://postgres:***@localhost:5432/customerdb
1 rows affected.


customer_identifier,income_group,age,number_of_accounts
ID_d459e586ee56b3e18a26d856a7f19c951c6ff61d6de841bf00eff41d0b85cf47b79fa26e1496046e414faac25d1e5da349c0bf245aee7d1eee6295356d96e805,"R34,000 - 41,999",32,38
