1.) Set Up Environment

In [1]:
import pandas as pd #dataframe library
import numpy as np #numeric library
import datetime as dt

#plot library
import matplotlib.pyplot as plt 
import seaborn as sns 

#evaluation library
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report,recall_score,precision_score

#label encoder library
from sklearn.preprocessing import LabelEncoder

# for min_max scaling (Kaggle)
from mlxtend.preprocessing import minmax_scaling

from sklearn.preprocessing import MinMaxScaler, StandardScaler

# for changing datatime type
from sklearn.preprocessing import StandardScaler

# for Box-Cox Transformation
from scipy import stats

# set seed for reproducibility
np.random.seed(0)

2.) Dataset Preparation

In [2]:
# for SMP Team
smp_url = 'https://docs.google.com/spreadsheets/d/1fQjK_yvaj5yh006nRTcMXaoU0RAx8OUy2IhpZG4tNMw/edit#gid=0'
smp_data = smp_url.replace('/edit#gid=', '/export?format=csv&gid=')
smp = pd.read_csv(smp_data)
smp.head()

Unnamed: 0,subs_no
0,2106705274
1,1075588229
2,1349135784
3,68147238
4,1356074904


In [3]:
# for NS Team
ns_url = 'https://docs.google.com/spreadsheets/d/16Vrap9JRBTrq9vA-Xz-1ZkLv0GiaWZ6oETrhL4ry-Ok/edit#gid=0'
ns_data = ns_url.replace('/edit#gid=', '/export?format=csv&gid=')
ns = pd.read_csv(ns_data)
ns.head()

Unnamed: 0,time_no,subs_key,subs_no,service_filter,rem_bal,rtd_amt,free_unit,usg_unit,sid
0,1/9/2021 21:24,6281932975189,2106705274,SMSBAS,673,50,0.0,1,SILVER
1,1/9/2021 21:29,6281932975189,2106705274,SMSBAS,623,50,0.0,1,SILVER
2,1/9/2021 21:36,6281932975189,2106705274,SMSBAS,573,50,0.0,1,SILVER
3,1/9/2021 7:43,6287775860056,1075588229,VASOCC,7220,5500,0.0,1,SILVER
4,1/9/2021 4:24,6287775860056,1075588229,CONTEN,101720,55,,1048576,TRIBE


In [4]:
# for ITInfra Team
infra_url = 'https://docs.google.com/spreadsheets/d/1RxhtYZPFhB5F2a3434SAC6K11j_sRMhRDu5avKHMAQg/edit#gid=0'
infra_data = infra_url.replace('/edit#gid=', '/export?format=csv&gid=')
infra = pd.read_csv(infra_data)
infra.head()

Unnamed: 0,day_no,time_no,service_filter,subs_no,b_no,event_action_code,prefix_dom_sk_id,originating_cell_id,home_poc,city_name,physical_poc,roam_zone,destination_poc
0,1/9/2021,1/9/2021 21:24,SMSBAS,2106705274,6285693000000.0,O,5636,510.11.34314.18692,JK0,JAKARTA,KA0,LOCXLM,JKT
1,1/9/2021,1/9/2021 21:29,SMSBAS,2106705274,6285693000000.0,O,5636,510.11.34314.38401,JK0,JAKARTA,KA0,LOCXLM,JKT
2,1/9/2021,1/9/2021 21:36,SMSBAS,2106705274,6285693000000.0,O,5636,510.11.34314.18693,JK0,JAKARTA,KA0,LOCXLM,JKT
3,1/9/2021,1/9/2021 7:43,VASOCC,1075588229,9955702.0,B,-1,0.0.0.0,NOPOC,JAKARTA,B,LOCXLM,0
4,1/9/2021,1/9/2021 4:24,CONTEN,1075588229,0.0,O,-1,510.11.411042.3,JK0,JAKARTA,JK0,NATZ10,0


In [5]:
# for Finance Team
fin_url = 'https://docs.google.com/spreadsheets/d/1F0KiwUmqIiknViTQ4Os_SdHdBVA2ErOOrSgH2Hqeumc/edit#gid=0'
fin_data = fin_url.replace('/edit#gid=', '/export?format=csv&gid=')
fin = pd.read_csv(fin_data)
fin.head()

Unnamed: 0,subs_no,payment_cat
0,2106705274,PRE
1,1075588229,PRE
2,1349135784,PRE
3,68147238,PRE
4,1356074904,PRE


3.) Merge Columns

In [6]:
merged_data = pd.merge(smp, ns, on ='subs_no', how ='left')

In [7]:
total_rows= len(merged_data)
print(total_rows)

44123


In [8]:
merged_data.head()

Unnamed: 0,subs_no,time_no,subs_key,service_filter,rem_bal,rtd_amt,free_unit,usg_unit,sid
0,2106705274,1/9/2021 21:24,6281932975189,SMSBAS,673,50,0.0,1,SILVER
1,2106705274,1/9/2021 21:29,6281932975189,SMSBAS,623,50,0.0,1,SILVER
2,2106705274,1/9/2021 21:36,6281932975189,SMSBAS,573,50,0.0,1,SILVER
3,2106705274,1/9/2021 21:19,6281932975189,SMSBAS,723,50,0.0,1,SILVER
4,2106705274,2/9/2021 16:38,6281932975189,SMSBAS,523,50,0.0,1,SILVER


In [9]:
infra.head()

Unnamed: 0,day_no,time_no,service_filter,subs_no,b_no,event_action_code,prefix_dom_sk_id,originating_cell_id,home_poc,city_name,physical_poc,roam_zone,destination_poc
0,1/9/2021,1/9/2021 21:24,SMSBAS,2106705274,6285693000000.0,O,5636,510.11.34314.18692,JK0,JAKARTA,KA0,LOCXLM,JKT
1,1/9/2021,1/9/2021 21:29,SMSBAS,2106705274,6285693000000.0,O,5636,510.11.34314.38401,JK0,JAKARTA,KA0,LOCXLM,JKT
2,1/9/2021,1/9/2021 21:36,SMSBAS,2106705274,6285693000000.0,O,5636,510.11.34314.18693,JK0,JAKARTA,KA0,LOCXLM,JKT
3,1/9/2021,1/9/2021 7:43,VASOCC,1075588229,9955702.0,B,-1,0.0.0.0,NOPOC,JAKARTA,B,LOCXLM,0
4,1/9/2021,1/9/2021 4:24,CONTEN,1075588229,0.0,O,-1,510.11.411042.3,JK0,JAKARTA,JK0,NATZ10,0


In [10]:
merged_data = pd.merge(merged_data, infra, on = ['subs_no','time_no','service_filter'], how ='left')