In [1]:
import pandas as pd

## Objective:
Analyze Market Share trends by Cities (Mumbai, Bangalore, Delhi, Chennai, Kolkata, Hyderabad)

To understand the churn pattern of users i.e. in which states/cities we are seeing a major movement of users to other carriers

Comparison of the top 10 pin codes, where Airtel has lost market share in this period and other operators like Reliance Jio has witnessed a significant surge

Suggest a distinctive strategy for Airtel to retain its users across 3 different states

Specify the assumptions taken during the analysis

## Data Dictionary:
date: Date of which the record is captured

udid: Unique identifier of the user/customer


Operator: Mobile operator of the user

State: State where the user is spotted on that day

City: City where the user is spotted on that day

zipcode: ZIP where the user is spotted on that day

Ad_Requests: Mobile traffic in terms of ad-requests reflects user's quantitative activity on that day using that operator


In [3]:
data = pd.read_csv('../Data/rawdata.csv')

In [4]:
data.head()

Unnamed: 0,date,udid,operator,state,city,zipcode,ad_requests
0,2018-04-15,00000902bb5bea2d8f865e1c6b0ace6abf6725d7,Jio,TAMIL NADU,PALANI,624601,1
1,2018-05-15,00002cd8fd6d3ae94d385102c85d40870430bed3,Vodafone Essar,GUJARAT,AHMEDABAD,380014,3
2,2018-05-15,0000c379b9f8c3476e3fb7e86ec0b8ccc3f6b90c,Jio,UTTAR PRADESH,CHANDAULI,232101,2
3,2018-04-15,0001265ed98b8c1ac1f208e68a05ebcd501aca8b,Jio,ASSAM,NORTH GUWAHATI,781008,77
4,2018-05-15,0001cbb4b1428ad3f0175532b8c8e2b108f32d08,Jio,KARNATAKA,BANGALORE,560066,6


In [7]:
data.size

130320337

## Churn pattern

In [5]:
#State level
data_state = data[['date','udid','operator','state']].drop_duplicates()
print data_state.size
data_state.head()

68976844


Unnamed: 0,date,udid,operator,state
0,2018-04-15,00000902bb5bea2d8f865e1c6b0ace6abf6725d7,Jio,TAMIL NADU
1,2018-05-15,00002cd8fd6d3ae94d385102c85d40870430bed3,Vodafone Essar,GUJARAT
2,2018-05-15,0000c379b9f8c3476e3fb7e86ec0b8ccc3f6b90c,Jio,UTTAR PRADESH
3,2018-04-15,0001265ed98b8c1ac1f208e68a05ebcd501aca8b,Jio,ASSAM
4,2018-05-15,0001cbb4b1428ad3f0175532b8c8e2b108f32d08,Jio,KARNATAKA


In [20]:
print data_state.operator.unique()
print data_state.date.unique()

['Jio' 'Vodafone Essar' 'Bharti Airtel' 'Idea']
['2018-04-15' '2018-05-15']


In [6]:
#Replace operators by numbers and dates by months
df1 = data_state.replace({'Jio': 1, 'Vodafone Essar': 2, 'Bharti Airtel':3, 'Idea':4})
df1 = df1.replace({'2018-04-15': 'Apr', '2018-05-15': 'May'})
df1.head()

Unnamed: 0,date,udid,operator,state
0,Apr,00000902bb5bea2d8f865e1c6b0ace6abf6725d7,1,TAMIL NADU
1,May,00002cd8fd6d3ae94d385102c85d40870430bed3,2,GUJARAT
2,May,0000c379b9f8c3476e3fb7e86ec0b8ccc3f6b90c,1,UTTAR PRADESH
3,Apr,0001265ed98b8c1ac1f208e68a05ebcd501aca8b,1,ASSAM
4,May,0001cbb4b1428ad3f0175532b8c8e2b108f32d08,1,KARNATAKA


In [10]:
df1_apr = df1[df1['date'] == 'Apr']
df1_may = df1[df1['date'] == 'May']

In [27]:
df1_apr.head()
df1_may.head()

print df1_apr.size
print df1_may.size

34449024
34527820


In [34]:
#Churn at national level
country = pd.merge(df1_apr[['date','udid','operator']], df1_may[['date','udid','operator']], how='left', on= ['udid','operator'] ,indicator=True, suffixes=('_x', '_y'))

In [35]:
country.head()

Unnamed: 0,date_x,udid,operator,date_y,_merge
0,Apr,00000902bb5bea2d8f865e1c6b0ace6abf6725d7,1,May,both
1,Apr,0001265ed98b8c1ac1f208e68a05ebcd501aca8b,1,May,both
2,Apr,0003610ce98164faaafc131a01402eac5a87f2a3,1,May,both
3,Apr,0004533e17eeff9b25540908ea1e0c8848da89f0,2,May,both
4,Apr,0004b5b18802f8486339eb3daa04f633dcbc1c7c,1,May,both


In [36]:
country._merge.unique()

[both, left_only]
Categories (2, object): [both, left_only]

In [48]:
#total customers in the country (assuming every operator to have a new customer even if 1 person has 2 phones)
print 'Total connections =',country.size

#customers who churned
print 'Churned = ',country[country['_merge'] == 'left_only'].size

#%ge churn
print '%age churned = ',country[country['_merge'] == 'left_only'].size/country.size.astype(float)

#customers who stayed
print 'Loyal = ',country[country['_merge'] == 'both'].size

Total connections = 43844545
Churned =  3467535
%age churned =  0.0790870335181
Loyal =  40377010


In [49]:
#Churn at state level
state = pd.merge(df1_apr, df1_may, how='left', on= ['udid','operator','state'] ,indicator=True, suffixes=('_x', '_y'))
state.head()

Unnamed: 0,date_x,udid,operator,state,date_y,_merge
0,Apr,00000902bb5bea2d8f865e1c6b0ace6abf6725d7,1,TAMIL NADU,May,both
1,Apr,0001265ed98b8c1ac1f208e68a05ebcd501aca8b,1,ASSAM,May,both
2,Apr,0003610ce98164faaafc131a01402eac5a87f2a3,1,MAHARASHTRA,May,both
3,Apr,0004533e17eeff9b25540908ea1e0c8848da89f0,2,UTTAR PRADESH,May,both
4,Apr,0004b5b18802f8486339eb3daa04f633dcbc1c7c,1,ASSAM,May,both


In [50]:
state._merge.unique()

[both, left_only]
Categories (2, object): [both, left_only]

In [54]:
state[(state['_merge'] == 'left_only') & (state['date_y'].notnull())]

Unnamed: 0,date_x,udid,operator,state,date_y,_merge


In [56]:
state[pd.isnull(state['state'])]

Unnamed: 0,date_x,udid,operator,state,date_y,_merge


In [55]:
state[state['date_y'].notnull()]

Unnamed: 0,date_x,udid,operator,state,date_y,_merge
0,Apr,00000902bb5bea2d8f865e1c6b0ace6abf6725d7,1,TAMIL NADU,May,both
1,Apr,0001265ed98b8c1ac1f208e68a05ebcd501aca8b,1,ASSAM,May,both
2,Apr,0003610ce98164faaafc131a01402eac5a87f2a3,1,MAHARASHTRA,May,both
3,Apr,0004533e17eeff9b25540908ea1e0c8848da89f0,2,UTTAR PRADESH,May,both
4,Apr,0004b5b18802f8486339eb3daa04f633dcbc1c7c,1,ASSAM,May,both
5,Apr,0005502bacc89217da7dae4d4a43f333a9f6f046,2,TAMIL NADU,May,both
6,Apr,0005bfef699aba30aa6fccf4ec54f646ad7316d7,2,WEST BENGAL,May,both
7,Apr,00069ff468272664efda1c75bc034733e28ac06a,3,DELHI,May,both
8,Apr,0006c34d3509a8951b2023eecdd2583dc2981177,3,MAHARASHTRA,May,both
11,Apr,0008de27e3e1094c42d8d2fde8f0d71e22bb9ef1,3,MAHARASHTRA,May,both


In [None]:
import pandasql as ps
#df1.groupby(['udid','date']).filter(lambda x: len(x) > 1)

df1[df1.date == 'Apr'].groupby('udid').filter(lambda g: len(g) > 1).size()
#.groupby('udid').size()
#.sort_values(ascending=False)

q1 = """
SELECT operator 
FROM data_state
limit 2
"""

#print(ps.sqldf(q1, locals()))

In [24]:
df = pd.pivot_table(df1,values='operator',index='udid',columns='date')
df.head()

date,Apr,May
udid,Unnamed: 1_level_1,Unnamed: 2_level_1
000001b2f4b9d139ed496267f056516edf833e80,3.0,3.0
000003fb76b5e01f88dfbb57ce596de197d052cd,2.0,2.0
00000902bb5bea2d8f865e1c6b0ace6abf6725d7,1.0,1.0
00000a52170c3d10d43ff77eb03c04dd164996d4,1.0,1.0
00000a78561f96624d409c0c1b6c297ad308ce08,1.0,1.0


In [26]:
df[df['Apr'] > 1].head()

date,Apr,May
udid,Unnamed: 1_level_1,Unnamed: 2_level_1
000001b2f4b9d139ed496267f056516edf833e80,3.0,3.0
000003fb76b5e01f88dfbb57ce596de197d052cd,2.0,2.0
000016710f5ebd40631966c55bddff9893e1d52f,4.0,4.0
00001a76b6c34367585776ee555169435c111f29,4.0,4.0
00001d617ecfeeaa9e9bc65a3c7503582da7ca6f,2.0,2.0
