#**2. Device Dominance and User Engagement Analysis**

In [8]:
import sys
import os

sys.path.append(os.path.abspath("D:/D26_Files/Phonepe_Analytics/Pulse_Case_Studies/"))

from db_connection import get_phonepe_engine


engine = get_phonepe_engine()

In [9]:
import pandas as pd
from sqlalchemy import text

In [10]:
tables_df = pd.read_sql("SHOW TABLES;", engine)
tables_df

Unnamed: 0,Tables_in_phonepe_db
0,agg_transaction
1,agg_user
2,insurance_transaction
3,map_transaction
4,map_user
5,top_transaction


In [11]:
Agg_User_df = pd.read_sql("SELECT * FROM agg_user;", engine)
Agg_User_df

Unnamed: 0,State,Year,Quarter,Registered_users,App_opens,Brand,Brand_count,Brand_percentage
0,Andaman & Nicobar Islands,2018,1,6740,0,Xiaomi,1665,0.247033
1,Andaman & Nicobar Islands,2018,1,6740,0,Samsung,1445,0.214392
2,Andaman & Nicobar Islands,2018,1,6740,0,Vivo,982,0.145697
3,Andaman & Nicobar Islands,2018,1,6740,0,Oppo,501,0.074332
4,Andaman & Nicobar Islands,2018,1,6740,0,OnePlus,332,0.049258
...,...,...,...,...,...,...,...,...
6727,West Bengal,2022,1,21919787,236131065,Lenovo,330017,0.015056
6728,West Bengal,2022,1,21919787,236131065,Infinix,284678,0.012987
6729,West Bengal,2022,1,21919787,236131065,Asus,280347,0.012790
6730,West Bengal,2022,1,21919787,236131065,Apple,277752,0.012671


In [12]:
Map_User_df = pd.read_sql("SELECT * FROM map_user;", engine)
Map_User_df.head()

Unnamed: 0,State,Year,Quarter,District,Registered_users,App_opens
0,Andaman & Nicobar Islands,2018,1,North And Middle Andaman District,632,0
1,Andaman & Nicobar Islands,2018,1,South Andaman District,5846,0
2,Andaman & Nicobar Islands,2018,1,Nicobars District,262,0
3,Andaman & Nicobar Islands,2018,2,North And Middle Andaman District,911,0
4,Andaman & Nicobar Islands,2018,2,South Andaman District,8143,0


*1.	Which device brands dominate PhonePe’s registered users at the national level?*

In [13]:
Device_dominance_df = pd.read_sql("""
SELECT 
Brand, 
    SUM(Brand_count) AS Total_users
FROM agg_user
WHERE Year = (SELECT MAX(Year) FROM agg_user)
GROUP BY Brand
ORDER BY Total_users DESC;
""", engine)
Device_dominance_df

Unnamed: 0,Brand,Total_users
0,Xiaomi,88426667.0
1,Vivo,77224385.0
2,Samsung,65886550.0
3,Oppo,49093736.0
4,Realme,32685464.0
5,Others,24227235.0
6,Apple,10774897.0
7,OnePlus,7614988.0
8,Motorola,5003193.0
9,Huawei,4328502.0


*2.	Which districts show the highest user engagement levels (app opens per registered user)?*

In [14]:
district_engagement_df = pd.read_sql("""
SELECT 
    State,
    District,
        SUM(Registered_users) AS Total_users,
        SUM(App_opens) AS Total_opens,
        ROUND(SUM(App_opens) * 1.0 / SUM(Registered_users), 2) AS Engagement_score
FROM map_user
WHERE Registered_users > 0 AND Year = (SELECT MAX(Year) FROM map_user)
GROUP BY State, District
ORDER BY Engagement_score DESC
LIMIT 10;
""", engine)
district_engagement_df

Unnamed: 0,State,District,Total_users,Total_opens,Engagement_score
0,Meghalaya,South West Khasi Hills District,17958.0,80045265.0,4457.36
1,Arunachal Pradesh,Pakke Kessang District,8331.0,28866234.0,3464.92
2,Arunachal Pradesh,Shi Yomi District,7311.0,14971011.0,2047.74
3,Andaman & Nicobar Islands,Nicobars District,6048.0,11335936.0,1874.33
4,Nagaland,Noklak District,5447.0,8410530.0,1544.07
5,Meghalaya,Eastern West Khasi Hills District,8907.0,13159349.0,1477.42
6,Gujarat,The Dangs District,68828.0,94724753.0,1376.25
7,Arunachal Pradesh,Kamle District,12450.0,16932417.0,1360.03
8,Mizoram,Kolasib District,60193.0,73687397.0,1224.19
9,Manipur,Tamenglong District,52312.0,59830361.0,1143.72


*3.	Which regions prefer premium brands (Apple, OnePlus) vs budget brands (Xiaomi, Vivo, Samsung)?*

In [15]:
Region_brand_pref_df = pd.read_sql("""
SELECT 
State,
Brand,
    SUM(Brand_count) AS Total_users,
    CASE 
        WHEN Brand IN ('Apple', 'OnePlus') THEN 'Premium'
        WHEN Brand IN ('Xiaomi', 'Vivo', 'Samsung') THEN 'Budget'
    END AS Brand_category    
FROM agg_user 
WHERE Brand IN ('Apple', 'OnePlus', 'Xiaomi', 'Vivo', 'Samsung') AND Year = (SELECT MAX(Year) FROM agg_user)
GROUP BY State, Brand, Brand_category
ORDER BY State, Total_users DESC;
""", engine)
Region_brand_pref_df


Unnamed: 0,State,Brand,Total_users,Brand_category
0,Andaman & Nicobar Islands,Vivo,18688.0,Budget
1,Andaman & Nicobar Islands,Xiaomi,17957.0,Budget
2,Andaman & Nicobar Islands,Samsung,12955.0,Budget
3,Andaman & Nicobar Islands,OnePlus,3005.0,Premium
4,Andaman & Nicobar Islands,Apple,1634.0,Premium
...,...,...,...,...
170,Uttarakhand,OnePlus,66561.0,Premium
171,West Bengal,Xiaomi,5586240.0,Budget
172,West Bengal,Samsung,4233598.0,Budget
173,West Bengal,Vivo,3652204.0,Budget


*4.	Which brands have high user counts but low market penetration (underperforming brands)?*

In [16]:
underutilized_df = pd.read_sql("""
SELECT 
    Brand,
        SUM(Brand_count) AS Total_brand_users,
        AVG(Brand_percentage) AS Avg_market_share,
        COUNT(DISTINCT State) AS States_present
FROM agg_user
WHERE Year = (SELECT MAX(Year) FROM agg_user)
GROUP BY Brand
ORDER BY Avg_market_share DESC;
""", engine)
underutilized_df

Unnamed: 0,Brand,Total_brand_users,Avg_market_share,States_present
0,Xiaomi,88426667.0,0.248568,36
1,Vivo,77224385.0,0.199653,36
2,Samsung,65886550.0,0.172192,36
3,Oppo,49093736.0,0.129074,36
4,Realme,32685464.0,0.089629,36
5,Others,24227235.0,0.067263,36
6,Apple,10774897.0,0.030787,35
7,Micromax,259092.0,0.027763,2
8,OnePlus,7614988.0,0.021136,32
9,Huawei,4328502.0,0.016198,29


*5.	How does user engagement vary between major metropolitan districts and smaller districts?*

In [17]:
engagement_analysis_df = pd.read_sql("""
SELECT 
State,
District,
        ROUND(AVG(Registered_users), 0) AS Avg_users,
        ROUND(AVG(App_opens * 1.0 / Registered_users), 2) AS Engagement_score,
        CASE 
            WHEN District IN ('Ahmedabad District', 'Bengaluru Urban District', 'Chennai District', 'Hyderabad District', 'Kolkata District', 'Mumbai District', 'Mumbai Suburban District', 'Pune District', 'Thane District', 'Gautam Buddha Nagar District', 'Ghaziabad District', 'Gurugram District', 'Faridabad District', 'Kamrup Metropolitan District', 'New Delhi District', 'South East Delhi District', 'North East District', 'South West District', 'North West District', 'Sas Nagar District', 'Chandigarh District', 'Rangareddy District', 'Medchal Malkajgiri District', 'Sangareddy District', 'North Twenty Four Parganas District', 'South Twenty Four Parganas District', 'Howrah District', 'Hooghly District') 
            THEN 'Metro'
            ELSE 'Non-Metro'
        END AS Area_type
FROM map_user
WHERE Registered_users > 0 AND Year = (SELECT MAX(Year) FROM map_user)
GROUP BY State, District, Area_type
ORDER BY Engagement_score DESC;
""", engine)
engagement_analysis_df 

Unnamed: 0,State,District,Avg_users,Engagement_score,Area_type
0,Meghalaya,South West Khasi Hills District,4490.0,6269.23,Non-Metro
1,Arunachal Pradesh,Pakke Kessang District,2083.0,3052.28,Non-Metro
2,Arunachal Pradesh,Shi Yomi District,1828.0,2288.18,Non-Metro
3,Manipur,Senapati District,10498.0,1955.95,Non-Metro
4,Andaman & Nicobar Islands,Nicobars District,3024.0,1886.02,Non-Metro
...,...,...,...,...,...
847,Odisha,Jajapur District,933967.0,10.28,Non-Metro
848,Tamil Nadu,Thiruvallur District,2834981.0,9.93,Non-Metro
849,Uttar Pradesh,Gautam Buddha Nagar District,4038779.0,7.34,Metro
850,Delhi,South East Delhi District,2335082.0,4.92,Metro
