#**4. Transaction Analysis for Market Expansion**

In [2]:
import sys
import os

sys.path.append(os.path.abspath("D:/D26_Files/Phonepe_Analytics/Pulse_Case_Studies/"))

from db_connection import get_phonepe_engine


engine = get_phonepe_engine()

In [3]:
import pandas as pd
from sqlalchemy import text

In [3]:
tables_df = pd.read_sql("SHOW TABLES;", engine)
tables_df

Unnamed: 0,Tables_in_phonepe_db
0,agg_transaction
1,agg_user
2,insurance_transaction
3,map_transaction
4,map_user
5,top_transaction


In [4]:
Agg_Transaction_df = pd.read_sql("SELECT * FROM agg_transaction;", engine)
Agg_Transaction_df.head()

Unnamed: 0,State,Year,Quarter,Transaction_type,Transaction_count,Transaction_amount
0,Andaman & Nicobar Islands,2018,1,Recharge & bill payments,4200,1845307.0
1,Andaman & Nicobar Islands,2018,1,Peer-to-peer payments,1871,12138660.0
2,Andaman & Nicobar Islands,2018,1,Merchant payments,298,452507.2
3,Andaman & Nicobar Islands,2018,1,Financial Services,33,10601.42
4,Andaman & Nicobar Islands,2018,1,Others,256,184689.9


In [5]:
Map_Transaction_df = pd.read_sql("SELECT * FROM map_transaction;", engine)
Map_Transaction_df.head()

Unnamed: 0,State,Year,Quarter,District_name,Transaction_count,Transaction_amount
0,Andaman & Nicobar Islands,2018,1,North And Middle Andaman District,442,931663.1
1,Andaman & Nicobar Islands,2018,1,South Andaman District,5688,12560250.0
2,Andaman & Nicobar Islands,2018,1,Nicobars District,528,1139849.0
3,Andaman & Nicobar Islands,2018,2,North And Middle Andaman District,825,1317863.0
4,Andaman & Nicobar Islands,2018,2,South Andaman District,9395,23948240.0


*1.	What is the overall contribution of states to PhonePe’s transaction volume & value?*

In [6]:
states_contribution_df = pd.read_sql("""
SELECT 
    State,
    SUM(Transaction_count) as Total_Transaction_Volume,
    SUM(Transaction_amount) as Total_Transaction_Value,
    ROUND(AVG(Transaction_amount/Transaction_count), 2) as Avg_Transaction_Value
FROM agg_transaction 
GROUP BY State 
ORDER BY Total_Transaction_Value DESC, Total_Transaction_Volume DESC;
""", engine)
states_contribution_df.head()

Unnamed: 0,State,Total_Transaction_Volume,Total_Transaction_Value,Avg_Transaction_Value
0,Telangana,26174680000.0,41655960000000.0,1281.29
1,Karnataka,30970950000.0,40678720000000.0,1215.76
2,Maharashtra,31985210000.0,40374200000000.0,1155.92
3,Andhra Pradesh,18918700000.0,34669080000000.0,1280.31
4,Uttar Pradesh,18523600000.0,26885210000000.0,1164.27


*2.	How much do the top 5 states dominate compared to the rest of India?*

In [8]:
Top5_dominance_df = pd.read_sql("""
WITH state_totals AS (
    SELECT 
        State,
        SUM(Transaction_count) as Total_Volume,
        SUM(Transaction_amount) as Total_Value
    FROM agg_transaction 
    GROUP BY State
),
ranked_states AS (
    SELECT 
        State,
        Total_Volume,
        Total_Value,
        ROW_NUMBER() OVER (ORDER BY Total_Value DESC) as state_rank
    FROM state_totals
)
SELECT 
    CASE 
        WHEN state_rank <= 5 THEN 'Top 5 States'
        ELSE 'Rest of India'
    END as Category,
    SUM(Total_Volume) as Total_Transaction_Volume,
    SUM(Total_Value) as Total_Transaction_Value,
    COUNT(*) as Number_of_States,
    ROUND(SUM(Total_Value) * 100.0 / (SELECT SUM(Total_Value) FROM state_totals), 2) as Percentage_of_Total_Value,
    ROUND(SUM(Total_Volume) * 100.0 / (SELECT SUM(Total_Volume) FROM state_totals), 2) as Percentage_of_Total_Volume
FROM ranked_states
GROUP BY 
    CASE 
        WHEN state_rank <= 5 THEN 'Top 5 States'
        ELSE 'Rest of India'
    END
ORDER BY Total_Transaction_Value DESC;
""", engine)
Top5_dominance_df

Unnamed: 0,Category,Total_Transaction_Volume,Total_Transaction_Value,Number_of_States,Percentage_of_Total_Value,Percentage_of_Total_Volume
0,Top 5 States,126573100000.0,184263200000000.0,5,53.33,53.8
1,Rest of India,108711100000.0,161259100000000.0,31,46.67,46.2


*3.	Which underperforming states show strong recent growth (future opportunities)?*

In [11]:
underperforming_growth_states_df = pd.read_sql("""
SELECT 
    State,
    SUM(CASE WHEN Year = (SELECT MAX(Year) FROM agg_transaction) 
        THEN Transaction_amount ELSE 0 END) as Recent_Year_Value,
    SUM(CASE WHEN Year = (SELECT MAX(Year) - 1 FROM agg_transaction) 
        THEN Transaction_amount ELSE 0 END) as Previous_Year_Value,
    SUM(Transaction_amount) as Total_Overall_Value,
    ROUND(
        ((SUM(CASE WHEN Year = (SELECT MAX(Year) FROM agg_transaction) THEN Transaction_amount ELSE 0 END) - 
          SUM(CASE WHEN Year = (SELECT MAX(Year) - 1 FROM agg_transaction) THEN Transaction_amount ELSE 0 END)) * 100.0 /
         SUM(CASE WHEN Year = (SELECT MAX(Year) - 1 FROM agg_transaction) THEN Transaction_amount ELSE 0 END)), 2
    ) as Growth_Rate
FROM agg_transaction
GROUP BY State
HAVING Previous_Year_Value > 0
ORDER BY Growth_Rate DESC
LIMIT 10;
""", engine)
underperforming_growth_states_df

Unnamed: 0,State,Recent_Year_Value,Previous_Year_Value,Total_Overall_Value,Growth_Rate
0,Manipur,43874200000.0,23458050000.0,186265600000.0,87.03
1,Lakshadweep,716430000.0,397667800.0,1609321000.0,80.16
2,Jammu & Kashmir,528444000000.0,311538000000.0,1184683000000.0,69.62
3,Bihar,7403791000000.0,4848213000000.0,17901350000000.0,52.71
4,Ladakh,39718400000.0,26233680000.0,88994620000.0,51.4
5,West Bengal,6387113000000.0,4227142000000.0,15584160000000.0,51.1
6,Arunachal Pradesh,122435200000.0,81456470000.0,274435000000.0,50.31
7,Meghalaya,68391800000.0,46449040000.0,162543900000.0,47.24
8,Andaman & Nicobar Islands,30739720000.0,20933020000.0,70667450000.0,46.85
9,Chhattisgarh,2033512000000.0,1388610000000.0,4890472000000.0,46.44


*4.	Which states are “saturated” vs “emerging” markets for PhonePe?*

In [None]:
market_status_df = pd.read_sql("""
SELECT 
    State,
    SUM(Transaction_amount) as Total_Value,
    ROUND(AVG(Transaction_amount/Transaction_count), 2) as Avg_Transaction_Size,
    ROUND(
        (MAX(CASE WHEN Year = (SELECT MAX(Year) FROM agg_transaction) THEN Transaction_amount END) -
         MAX(CASE WHEN Year = (SELECT MAX(Year) - 1 FROM agg_transaction) THEN Transaction_amount END))
        * 100.0 /
        MAX(CASE WHEN Year = (SELECT MAX(Year) - 1 FROM agg_transaction) THEN Transaction_amount END), 2) as Growth_Percentage
FROM agg_transaction
GROUP BY State
HAVING MAX(Year) >= (SELECT MAX(Year) - 1 FROM agg_transaction)
ORDER BY Total_Value DESC;
""", engine )
market_status_df

*5.	Which 10 states shows high Total transaction value vs Total transaction volume?*

In [None]:
top_transaction_volume_df= pd.read_sql("""
 SELECT 
    State,
    SUM(Transaction_count) as Total_Transaction_Volume
FROM agg_transaction
GROUP BY State
ORDER BY Total_Transaction_Volume DESC
LIMIT 10;
""", engine)
top_transaction_volume_df

Unnamed: 0,State,Total_Transaction_Volume
0,Maharashtra,31985210000.0
1,Karnataka,30970950000.0
2,Telangana,26174680000.0
3,Andhra Pradesh,18918700000.0
4,Uttar Pradesh,18523600000.0
5,Rajasthan,17108540000.0
6,Madhya Pradesh,14072180000.0
7,Bihar,10941030000.0
8,West Bengal,9191500000.0
9,Odisha,8918527000.0


In [None]:
top_transaction_value_df= pd.read_sql("""
 SELECT 
    State,
    SUM(Transaction_amount) as Total_Transaction_Value
FROM agg_transaction
GROUP BY State
ORDER BY Total_Transaction_Value DESC
LIMIT 10;
""", engine)
top_transaction_value_df

Unnamed: 0,State,Total_Transaction_Value
0,Telangana,41655960000000.0
1,Karnataka,40678720000000.0
2,Maharashtra,40374200000000.0
3,Andhra Pradesh,34669080000000.0
4,Uttar Pradesh,26885210000000.0
5,Rajasthan,26343240000000.0
6,Madhya Pradesh,19125280000000.0
7,Bihar,17901350000000.0
8,West Bengal,15584160000000.0
9,Odisha,12263980000000.0
