# Import required modules

In [14]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, normalize
from sklearn import metrics
from sklearn.cluster import KMeans
from scipy.spatial.distance import cdist
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import os
import warnings
import pickle

sys.path.append(os.path.abspath(os.path.join('../scripts')))

from clean_dataframe import Utility
from plot_dataframe import Plotter

plot = Plotter()
cleaner = Utility()

In [15]:
warnings.filterwarnings('ignore')

# Read Data

In [16]:
df = pd.read_csv("../data/clean_data.csv")

In [17]:
# select only relevant columns for exprience analysis
tellco_exprience_df = df[['MSISDN/Number','Avg RTT DL (ms)','Avg RTT UL (ms)','Avg Bearer TP DL (kbps)','Avg Bearer TP UL (kbps)','TCP DL Retrans. Vol (Bytes)','TCP UL Retrans. Vol (Bytes)','Handset Type']]
tellco_exprience_df

Unnamed: 0,MSISDN/Number,Avg RTT DL (ms),Avg RTT UL (ms),Avg Bearer TP DL (kbps),Avg Bearer TP UL (kbps),TCP DL Retrans. Vol (Bytes),TCP UL Retrans. Vol (Bytes),Handset Type
0,33659219748,97.0,7.0,3845.0,2535.0,3231397.0,7230.0,Samsung Galaxy Note 8 (Sm-N950F Ds)
1,33664473872,213.5,4.0,28305.0,6500.0,5722628.0,222238.0,Huawei B593S-22
2,33659219748,79.0,4.0,4148.0,4540.0,13165244.0,34638.0,Samsung Galaxy Note 8 (Sm-N950F Ds)
3,33753758738,39.0,33.0,263.0,620.0,12964929.0,222238.0,Samsung Galaxy J3 (Sm-J330)
4,33658752999,213.5,43.0,29501.0,10557.0,9844005.0,21586.0,Huawei E5180
...,...,...,...,...,...,...,...,...
49504,33665236895,70.0,20.0,46545.0,1575.0,8414820.0,50779.0,Huawei B528S-23A
49505,33666584437,213.5,61.0,9150.0,604.0,410605.0,5837.0,Samsung Galaxy A5 Sm-A520F
49506,33685838753,69.0,6.0,10960.0,877.0,83246.0,9136.0,Samsung Galaxy A8 (2018)
49507,33761274518,55.0,3.0,30741.0,12623.0,2288.0,2770.0,Apple iPhone 6S (A1688)


In [18]:
# merge Avg RTT (ms) , Avg Bearer TP (kbps) and Total TCP Retrans. Vol (Bytes)
tellco_exprience_df['Total Avg RTT (ms)'] = tellco_exprience_df['Avg RTT DL (ms)'] + tellco_exprience_df['Avg RTT UL (ms)']
tellco_exprience_df['Total Avg Bearer TP (kbps)'] = tellco_exprience_df['Avg Bearer TP DL (kbps)'] + tellco_exprience_df['Avg Bearer TP UL (kbps)']
tellco_exprience_df['Total TCP Retrans. Vol (Bytes)'] = tellco_exprience_df['TCP DL Retrans. Vol (Bytes)'] + tellco_exprience_df['TCP UL Retrans. Vol (Bytes)']
tellco_exprience_df.head()

Unnamed: 0,MSISDN/Number,Avg RTT DL (ms),Avg RTT UL (ms),Avg Bearer TP DL (kbps),Avg Bearer TP UL (kbps),TCP DL Retrans. Vol (Bytes),TCP UL Retrans. Vol (Bytes),Handset Type,Total Avg RTT (ms),Total Avg Bearer TP (kbps),Total TCP Retrans. Vol (Bytes)
0,33659219748,97.0,7.0,3845.0,2535.0,3231397.0,7230.0,Samsung Galaxy Note 8 (Sm-N950F Ds),104.0,6380.0,3238627.0
1,33664473872,213.5,4.0,28305.0,6500.0,5722628.0,222238.0,Huawei B593S-22,217.5,34805.0,5944866.0
2,33659219748,79.0,4.0,4148.0,4540.0,13165244.0,34638.0,Samsung Galaxy Note 8 (Sm-N950F Ds),83.0,8688.0,13199882.0
3,33753758738,39.0,33.0,263.0,620.0,12964929.0,222238.0,Samsung Galaxy J3 (Sm-J330),72.0,883.0,13187167.0
4,33658752999,213.5,43.0,29501.0,10557.0,9844005.0,21586.0,Huawei E5180,256.5,40058.0,9865591.0


In [19]:
tellco_exprience_df = tellco_exprience_df[['MSISDN/Number','Total Avg RTT (ms)','Total Avg Bearer TP (kbps)','Total TCP Retrans. Vol (Bytes)','Handset Type']]
tellco_exprience_df.head()

Unnamed: 0,MSISDN/Number,Total Avg RTT (ms),Total Avg Bearer TP (kbps),Total TCP Retrans. Vol (Bytes),Handset Type
0,33659219748,104.0,6380.0,3238627.0,Samsung Galaxy Note 8 (Sm-N950F Ds)
1,33664473872,217.5,34805.0,5944866.0,Huawei B593S-22
2,33659219748,83.0,8688.0,13199882.0,Samsung Galaxy Note 8 (Sm-N950F Ds)
3,33753758738,72.0,883.0,13187167.0,Samsung Galaxy J3 (Sm-J330)
4,33658752999,256.5,40058.0,9865591.0,Huawei E5180


In [20]:
# tellco_exprience_df = tellco_exprience_df.copy()
tellco_exprience_df1 = tellco_exprience_df.groupby(
    'MSISDN/Number').agg({'Total Avg RTT (ms)': 'sum', 'Total Avg Bearer TP (kbps)': 'sum', 'Total TCP Retrans. Vol (Bytes)': 'sum','Handset Type': [lambda x: x.mode()[0]] }) #' '.join(x)
tellco_exprience_df1

Unnamed: 0_level_0,Total Avg RTT (ms),Total Avg Bearer TP (kbps),Total TCP Retrans. Vol (Bytes),Handset Type
Unnamed: 0_level_1,sum,sum,sum,<lambda>
MSISDN/Number,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
33601008617,91.0,52995.0,9370832.0,Apple iPhone Se (A1723)
33601011634,39.0,42416.0,110232.0,Huawei Mate 10 Pro Porsche Design Huawei Mate 10
33601021217,160.0,19256.0,13171894.0,Apple iPhone 7 Plus (A1784)
33601031129,60.0,38190.0,2325497.0,Apple iPhone 8 Plus (A1897)
33601034530,217.5,8539.0,2006261.0,Apple iPhone 7 (A1778)
...,...,...,...,...
33789914536,70.0,554.0,2734889.0,Apple iPhone Se (A1723)
33789922012,128.0,43305.0,1229545.0,Samsung Galaxy S7 Edge (Sm-G935X)
33789942399,52.0,110037.5,2804004.0,Samsung Galaxy S9 Sm-G960F Ds
33789980299,69.0,104321.5,10096.0,undefined


In [21]:
tellco_exprience_df = pd.DataFrame(columns=[
    "Total Avg RTT (ms)",
    "Total Avg Bearer TP (kbps)",
    "Total TCP Retrans. Vol (Bytes)",
    "Handset Type"])

tellco_exprience_df["Total Avg RTT (ms)"] = tellco_exprience_df1["Total Avg RTT (ms)"]['sum']
tellco_exprience_df["Total Avg Bearer TP (kbps)"] = tellco_exprience_df1["Total Avg Bearer TP (kbps)"]['sum']
tellco_exprience_df["Total TCP Retrans. Vol (Bytes)"] = tellco_exprience_df1["Total TCP Retrans. Vol (Bytes)"]['sum']
tellco_exprience_df["Handset Type"] = tellco_exprience_df1["Handset Type"]['<lambda>']
tellco_exprience_df.head()

Unnamed: 0_level_0,Total Avg RTT (ms),Total Avg Bearer TP (kbps),Total TCP Retrans. Vol (Bytes),Handset Type
MSISDN/Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
33601008617,91.0,52995.0,9370832.0,Apple iPhone Se (A1723)
33601011634,39.0,42416.0,110232.0,Huawei Mate 10 Pro Porsche Design Huawei Mate 10
33601021217,160.0,19256.0,13171894.0,Apple iPhone 7 Plus (A1784)
33601031129,60.0,38190.0,2325497.0,Apple iPhone 8 Plus (A1897)
33601034530,217.5,8539.0,2006261.0,Apple iPhone 7 (A1778)


# Top 10 of the top, bottom and most frequent Datas

In [22]:
rtt = tellco_exprience_df.sort_values('Total Avg RTT (ms)', ascending=False)
rtt.head(10)['Total Avg RTT (ms)']

MSISDN/Number
33699231421    1546.5
33661827479    1404.0
33658263267    1352.0
33699168714    1323.5
33664709594    1308.5
33761249975    1234.0
33662014376    1226.0
33665218470    1213.5
33760112066    1206.0
33668563241    1195.0
Name: Total Avg RTT (ms), dtype: float64

In [24]:
tellco_exprience_df['Total Avg RTT (ms)'].value_counts().head(10)

49.0     467
52.0     440
46.0     429
48.0     429
44.0     426
55.0     425
216.5    425
50.0     416
51.0     405
217.5    404
Name: Total Avg RTT (ms), dtype: int64

In [25]:
br_tp = tellco_exprience_df.sort_values('Total Avg Bearer TP (kbps)', ascending=False)
br_tp

Unnamed: 0_level_0,Total Avg RTT (ms),Total Avg Bearer TP (kbps),Total TCP Retrans. Vol (Bytes),Handset Type
MSISDN/Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
33659546392,512.0,650628.5,27712733.0,Huawei B528S-23A
33762644658,1028.0,502403.5,66762110.0,Huawei B528S-23A
33699248832,549.0,501263.5,75939773.0,Huawei B528S-23A
33666461685,543.0,470459.0,12325061.0,Huawei B528S-23A
33658727547,525.0,465453.0,8993383.0,Huawei B528S-23A
...,...,...,...,...
33658615582,229.5,57.0,8678.0,Apple iPhone 6S (A1688)
33621826880,233.5,53.0,14867.0,Apple iPhone Se (A1723)
33763459790,228.5,53.0,23200.0,Apple iPhone Xr (A2105)
33669105392,209.0,52.0,12710.0,Apple iPhone 8 (A1905)


In [26]:
br_tp.head(10)['Total Avg Bearer TP (kbps)']

MSISDN/Number
33659546392    650628.5
33762644658    502403.5
33699248832    501263.5
33666461685    470459.0
33658727547    465453.0
33660503175    449840.5
33668708263    446830.0
33763862031    446161.5
33699327554    435707.0
33698935800    435185.0
Name: Total Avg Bearer TP (kbps), dtype: float64

In [27]:
br_tp.tail(10)['Total Avg Bearer TP (kbps)']

MSISDN/Number
33618145282    60.0
33650717329    59.0
33646320432    58.0
33686839010    57.0
33661835953    57.0
33658615582    57.0
33621826880    53.0
33763459790    53.0
33669105392    52.0
33661466916    38.0
Name: Total Avg Bearer TP (kbps), dtype: float64

In [None]:
tellco_exprience_df['Total Avg Bearer TP (kbps)'].value_counts().head(10)

In [None]:
tcp_retransmitted = tellco_exprience_df.sort_values('Total TCP Retrans. Vol (Bytes)', ascending=False)
tcp_retransmitted

In [None]:
tcp_retransmitted.head(10)['Total TCP Retrans. Vol (Bytes)']

In [None]:
tcp_retransmitted.tail(10)['Total TCP Retrans. Vol (Bytes)']

In [None]:
tellco_exprience_df['Total TCP Retrans. Vol (Bytes)'].value_counts().head(10)

In [None]:
tellco_exprience_df.info()

# Handset Type Analysis

In [None]:
handset_type_exprience_df = tellco_exprience_df.groupby('Handset Type').agg({'Total Avg Bearer TP (kbps)': 'mean', 'Total TCP Retrans. Vol (Bytes)': 'mean','Total Avg RTT (ms)': "mean"}) #.mean()
handset_type_exprience_df #.sort_values(by='Total Avg Bearer TP (kbps)', ascending=False).head()

In [None]:
#average TCP retransmission view per handset type
handset_type_exprience_df.sort_values('Total TCP Retrans. Vol (Bytes)', ascending=False).head()

# k-means clustering

In [None]:
tellco_exprience_df.drop(['Handset Type'], axis=1, inplace=True)

In [None]:

scale_data = StandardScaler().fit_transform(tellco_exprience_df)
scale_data

In [None]:
normalized_data = normalize(scale_data)
normalized_data

In [None]:
kmeans = KMeans(n_clusters = 3, random_state = 42).fit(normalized_data)
y_kmeans = kmeans.fit_predict(normalized_data)
X = np.array(normalized_data)
y_kmeans

In [None]:
clustered_tellco_exprience_df = tellco_exprience_df.copy()
clustered_tellco_exprience_df.insert(0, 'Cluster', y_kmeans)
clustered_tellco_exprience_df

# segment users into groups of experiences

In [None]:
clustered_tellco_exprience_df[clustered_tellco_exprience_df["Cluster"]==0][["Total Avg RTT (ms)","Total Avg Bearer TP (kbps)","Total TCP Retrans. Vol (Bytes)"]].describe()

In [None]:
clustered_tellco_exprience_df[clustered_tellco_exprience_df["Cluster"]==1][["Total Avg RTT (ms)","Total Avg Bearer TP (kbps)","Total TCP Retrans. Vol (Bytes)"]].describe()

In [None]:
clustered_tellco_exprience_df[clustered_tellco_exprience_df["Cluster"]==2][["Total Avg RTT (ms)","Total Avg Bearer TP (kbps)","Total TCP Retrans. Vol (Bytes)"]].describe()

In [None]:
# save the data
clustered_tellco_exprience_df.to_csv('../data/user_experience_data.csv')

In [None]:
with open("../models/user_experience.pkl", "wb") as f:
    pickle.dump(kmeans, f)