In [None]:
pip install sqlalchemy

In [None]:
pip install psycopg3-binary

In [None]:
pip install ipython-sql

In [3]:
%load_ext sql
from sqlalchemy import create_engine

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


In [4]:
%sql postgresql://postgres:990723@localhost:5432/absadatabase

In [None]:
%%sql
CREATE TABLE ASSETS AS
SELECT 
    transaction_description,
    SUM(CASE WHEN amt < 0 THEN amt ELSE 0 END) AS credit,
    SUM(CASE WHEN amt > 0 THEN amt ELSE 0 END) AS debit,
    SUM(amt) AS balance,
    CASE 
        WHEN SUM(amt) > 0 THEN 'LOSS'
        WHEN SUM(amt) < 0 THEN 'PROFIT'
        ELSE 'NEUTRAL'
    END AS profit_loss
FROM 
    transactiondata
WHERE 
    transaction_description IN (
        'CREDIT TRANSFER',
        'TEL CR TRANSFER',
        'MOBILE PAYMENT CR',
        'DIGITAL PAYMENT CR',
        'ATM PAYMENT FR',
        'CASHSEND ATM',
        'CASHSEND DIGITAL',
        'CASHSEND MOBILE',
        'POS REFUND PUR',
        'POS CARD REFUND',
        'POS O/SEA REFUND',
        'DIGITAL TRANSF CR',
        'INVESTMENT CAPITAL',
        'INVESTMNT INTEREST',
        'INTEREST',
        'INTEREST ADJUST',
        'BAD DEBT W/OFF'
    )
GROUP BY 
    transaction_description;


In [2]:
%load_ext sql
from sqlalchemy import create_engine
%config SqlMagic.style = '_DEPRECATED_DEFAULT'

## **Channel Performance Over Time**
## How does the performance of each channel vary seasonally, and can this data inform better channel-specific promotions?
## The insights help the bank understand which channels are profitable and which need attention, guiding future investments, marketing strategies, and operational improvements.

In [7]:
%%sql
SELECT 
    EXTRACT(MONTH FROM record_date) AS month,
    EXTRACT(YEAR FROM record_date) AS year,
    channel,
    SUM(CASE WHEN amt >0  THEN amt ELSE 0 END) AS INFLOW,
    SUM(CASE WHEN amt <0  THEN amt ELSE 0 END) AS OUTFLOW,
    CASE 
       WHEN SUM(amt) >0 THEN 'INFLOW'
       WHEN SUM(amt) <0 THEN 'OUTFLOW'
       ELSE 'NEUTRAL'
    END AS flow
FROM 
    transactiondata
GROUP BY 
    year, month, channel
ORDER BY 
    year, month



 * postgresql://postgres:***@localhost:5432/absadatabase
56 rows affected.


month,year,channel,inflow,outflow,flow
7,2021,atm,25620.0,-340806.5,OUTFLOW
7,2021,internet,114710.89,-280492.34,OUTFLOW
7,2021,system,912890.88,-511596.39,INFLOW
7,2021,teller,133245.84000000003,-66463.43,INFLOW
8,2021,atm,113780.0,-1046787.5,OUTFLOW
8,2021,internet,507749.17,-1031156.43,OUTFLOW
8,2021,system,2649958.68,-1651552.4600000002,INFLOW
8,2021,teller,423456.22,-62617.09,INFLOW
9,2021,atm,196500.0,-2378441.0,OUTFLOW
9,2021,internet,1360257.41,-2148567.68,OUTFLOW


In [12]:
%%sql
SELECT 
    channel,
    SUM(CASE WHEN amt >0  THEN amt ELSE 0 END) AS INFLOW,
    SUM(CASE WHEN amt <0  THEN amt ELSE 0 END) AS OUTFLOW,
    SUM(amt) AS TOTAL_SUM,
    CASE 
       WHEN SUM(amt) >0 THEN 'INFLOW'
       WHEN SUM(amt) <0 THEN 'OUTFLOW'
       ELSE 'NEUTRAL'
    END AS flow
FROM 
    transactiondata
GROUP BY 
 channel
;



 * postgresql://postgres:***@localhost:5432/absadatabase
4 rows affected.


channel,inflow,outflow,total_sum,flow
atm,14578923.85,-111512320.62999998,-96933396.78,OUTFLOW
internet,74404500.50999998,-103473226.9000001,-29068726.389999986,OUTFLOW
system,251441783.0500003,-176631281.48999023,74810501.56000988,INFLOW
teller,82452458.30999996,-20761962.520000003,61690495.78999995,INFLOW


## **High Value Customer**
## Who are the high-value customers, and how can we tailor premium services or exclusive offers for them?
 ## Identifying customers with irregular spending behavior, such as a few large transactions, can help the company assess potential risks, like fraud. By identifying high-value customers (e.g., those with high total spending or high average transaction amounts), the company can create targeted promotions or personalized offers to retain or upsell these customers.

In [13]:
%%sql
SELECT customer_identifier, 
SUM(amt) AS total_spent, 
AVG(amt) AS avg_transaction_amount
FROM transactiondata
GROUP BY customer_identifier
HAVING SUM(amt) > 10000
ORDER BY  SUM(amt) DESC

 * postgresql://postgres:***@localhost:5432/absadatabase
538 rows affected.


customer_identifier,total_spent,avg_transaction_amount
ID_66d6168747d103782cae4376d58479f63e4adfa2a04f6c0188cdeffa03d8b5f9e100b2cfc8e417e724f103f944c9f7373db249726217619436caee2babd07853,48737.73999999999,230.98454976303316
ID_87b3548ac5da8a321eb9740ef432be99eb30d4b5200b7c2d0d8efd80966bc37034588a09ee6b5542767e145edae4216c50492df1ff81dec7697f615ae47a9ace,46060.14,9212.028
ID_c211d0275467063000c59b22bca5c53ff25f6944cb21edf35545aefc5ff6af3af3331e9bdba71931d9c558ce42840f2776d50cba6c4684dd0a1b9a78ad2b6de3,45311.81,272.9627108433735
ID_86833e12e78bf3fe3a524d2e3528b833fa56c27a069598c1536fc1a189031f34ee45b79591d0ff1d23acc995516e1f949f44e1d547fd4aedec2eb3a6532775db,44300.0,44300.0
ID_9d8fed88282637414b34c6573caac346255083c1a8334fba5d8fb78be5f259f3f6286efc83dbf83956bb3f210562c20f797b1005a31a77cb61d35a308a05051e,43471.16,331.84091603053434
ID_fecebdd617f387ae67b4627af6f3287c692b8e4a14eed838f3c6bfcda2301580dbbd80f4c9888dd24060bf765afec1ede05d626a0981166f86e84ebae3498a52,43054.49999999998,214.20149253731333
ID_bbebeb551e3e7811bc1f5c9d273aa47f1132675d485a6ad862fb184d73788fa2a01e7eaebbee59d52f965de6f258552593937b4b583cfa288bbd3ab4d09c6aef,42107.0,10526.75
ID_bbfd5c7dba99e4c044a825139f0995c880cc886bcdf27254410a3b1cdf6a75d2b11226d343597b871917705222b212125146627ae11a012e2612a80c134f6fe4,41940.04999999996,97.3087006960556
ID_f59f4560670e6405bcf4f855e31d800571e97e6d1f88a31da400b764db7b411adef421534c468afff007fb91f97db8a13207a86a655953e1520759eef6943ef5,41445.879999999976,460.5097777777775
ID_54d714b6c32cdfda497884560e5e238a23400c720c35a37a7d8dbaf9258ca51175968c51d001a664fc6298bff1e21b08caa80344c20a29eb0fbd5c792c7d8a00,41000.0,20500.0


## Identifying customers with active accounts, and customers who are at risk of churn
## The results suggest customers flagged as "At Risk of Churn" haven't had any transactions in the last 3 months
## These customers should be targeted with retention strategies like personalized offers, loyalty rewards, etc..
## These customers should be targeted with retention strategies like personalized offers, loyalty rewards, 

In [None]:
%%sql

WITH CustomerLastTransaction AS (
    SELECT 
        CUSTOMER_IDENTIFIER,
        MAX(RECORD_DATE) AS LAST_TRANSACTION_DATE
    FROM 
        transactiondata 
    GROUP BY 
        CUSTOMER_IDENTIFIER
),

RecentActivity AS (
    SELECT 
        t.CUSTOMER_IDENTIFIER,
        COUNT(*) AS RECENT_TRANSACTION_COUNT
    FROM 
        transactiondata t
    WHERE 
        t.RECORD_DATE >= DATE '2022-05-01' -- 3 months before the reference date
        AND t.RECORD_DATE <= DATE '2022-08-01' -- Reference date
    GROUP BY 
        t.CUSTOMER_IDENTIFIER
)

SELECT 
    c.CUSTOMER_IDENTIFIER,
    c.LAST_TRANSACTION_DATE,
    COALESCE(ra.RECENT_TRANSACTION_COUNT, 0) AS RECENT_TRANSACTION_COUNT,
    CASE 
        WHEN COALESCE(ra.RECENT_TRANSACTION_COUNT, 0) = 0 THEN 'At Risk of Churn'
        ELSE 'Active'
    END AS CUSTOMER_STATUS
FROM 
    CustomerLastTransaction c
LEFT JOIN 
    RecentActivity ra
ON 
    c.CUSTOMER_IDENTIFIER = ra.CUSTOMER_IDENTIFIER
ORDER BY 
    CUSTOMER_STATUS, c.LAST_TRANSACTION_DATE ASC



In [39]:
pip install matplotlib seaborn pandas

Note: you may need to restart the kernel to use updated packages.


In [40]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Sample DataFrame (replace this with your actual data)
data = {
    'month': [7, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12, 12, 12, 12, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8,],
    'year': [2021, 2021, 2021, 2021, 2021, 2021, 2021, 2021, 2021, 2021, 2021, 2021, 2021, 2021, 2021, 2021, 2021, 2021, 2021, 2021, 2021, 2021, 2021, 2021, 2022, 2022, 2022, 2022, 2022, 2022, 2022, 2022, 2022, 2022, 2022, 2022, 202],
    'channel': ['atm', 'internet', 'system', 'teller', 'atm', 'internet', 'system', 'teller'],
    'total_revenue': [-315186.5, -165781.45, 401294.49, 66782.41, -933007.5, -523407.26, 998406.22, 360839.13]
}

df = pd.DataFrame(data)

# Display the dataframe to check if it's correct
df


Unnamed: 0,month,year,channel,total_revenue
0,7,2021,atm,-315186.5
1,7,2021,internet,-165781.45
2,7,2021,system,401294.49
3,7,2021,teller,66782.41
4,8,2021,atm,-933007.5
5,8,2021,internet,-523407.26
6,8,2021,system,998406.22
7,8,2021,teller,360839.13
