# User Analytics in the Telecommunication Industry #

## Task 4 - Satisfaction Analysis ##

In this part, we wil assume that the satisfaction of a user is dependent on user engagement and experience, we will then analyze customer satisfaction in depth.

#### 1. Engagement score ####

In [1]:
#importing libraries
!pip install tabulate
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from statistics import *
from tabulate import tabulate
from sklearn import preprocessing
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Ridge




In [2]:
#Reading the engagement datamart
data=pd.read_csv('engagement_datamart')
data.head()

Unnamed: 0,MSISDN/Number,no.of xDR sessions,Total Uls and DLs,session_freq
0,33601000000.0,1,517373800.0,1
1,33601000000.0,1,1234090000.0,1
2,33601000000.0,1,718383600.0,1
3,33601010000.0,1,1638393000.0,1
4,33601010000.0,2,1158391000.0,1


In [3]:
data_MSISDN=data.copy()

In [4]:
data_MSISDN.head()

Unnamed: 0,MSISDN/Number,no.of xDR sessions,Total Uls and DLs,session_freq
0,33601000000.0,1,517373800.0,1
1,33601000000.0,1,1234090000.0,1
2,33601000000.0,1,718383600.0,1
3,33601010000.0,1,1638393000.0,1
4,33601010000.0,2,1158391000.0,1


In [5]:
# first we will initialize the centroid of the least cluster in engagement score
c0=(-0.00395958, -0.4885642 ,  0.52150644,)


In [6]:
# A helper function to calculate the Euclidean diatance between the data 
# points and the least centroid in engagement score

def calculate_distance(centroid, X, Y,Z):
    distances = []
        
    # Unpack the x and y coordinates of the centroid
    c_x, c_y,c_z = centroid
        
    # Iterate over the data points and calculate the distance using the           # given formula
    for x, y,z in list(zip(X, Y,Z)):
        root_diff_x = (x - c_x) ** 2
        root_diff_y = (y - c_y) ** 2
        root_diff_z = (z - c_z) ** 2
        distance = np.sqrt(root_diff_x + root_diff_y + root_diff_z)
        distances.append(distance)
        
    return distances

In [7]:
# Calculate the distance and assign them to the DataFrame accordingly by calling the function
data_MSISDN['Engagement_Euclidean_Distance'] = calculate_distance(c0,data_MSISDN['no.of xDR sessions'],data_MSISDN['Total Uls and DLs'],data_MSISDN['session_freq'])

In [8]:
data_MSISDN.head()

Unnamed: 0,MSISDN/Number,no.of xDR sessions,Total Uls and DLs,session_freq,Engagement_Euclidean_Distance
0,33601000000.0,1,517373800.0,1,517373800.0
1,33601000000.0,1,1234090000.0,1,1234090000.0
2,33601000000.0,1,718383600.0,1,718383600.0
3,33601010000.0,1,1638393000.0,1,1638393000.0
4,33601010000.0,2,1158391000.0,1,1158391000.0


#### 1. Experience Score ####

In [9]:
#loading Experience datamart
exp_data=pd.read_csv('experience_datamart')

In [10]:
exp_data1=exp_data[['MSISDN/Number','Average TCP retransmission','Average RTT','Average throughput']]
exp_data1.head()

Unnamed: 0,MSISDN/Number,Average TCP retransmission,Average RTT,Average throughput
0,33664960000.0,21569570.0,47.0,46.0
1,33681850000.0,21569570.0,70.0,32.0
2,33760630000.0,21569570.0,127.458589,12.0
3,33750340000.0,21569570.0,127.458589,88.0
4,33699800000.0,21569570.0,127.458589,12.0


In [11]:
# first we will initialize the centroid of the least cluster in experience score
c1=(3.13199983e+09, 1.49370370e+02, 1.26895321e+05)


In [12]:
# A helper function to calculate the Euclidean diatance between the data 
# points and the least centroid in experience score

def calculate_distance(centroid, X, Y,Z):
    distances = []
        
    # Unpack the x and y coordinates of the centroid
    c_x, c_y,c_z = centroid
        
    # Iterate over the data points and calculate the distance using the           # given formula
    for x, y,z in list(zip(X, Y,Z)):
        root_diff_x = (x - c_x) ** 2
        root_diff_y = (y - c_y) ** 2
        root_diff_z = (z - c_z) ** 2
        distance = np.sqrt(root_diff_x + root_diff_y + root_diff_z)
        distances.append(distance)
        
    return distances

In [13]:
# Calculate the distance and assign the values to the DataFrame accordingly by calling the function
exp_data1['Experience_Euclidean_Distance'] = calculate_distance(c1,exp_data1['Average TCP retransmission'],exp_data1['Average RTT'],exp_data1['Average throughput'])



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



In [14]:
exp_data1.head()

Unnamed: 0,MSISDN/Number,Average TCP retransmission,Average RTT,Average throughput,Experience_Euclidean_Distance
0,33664960000.0,21569570.0,47.0,46.0,3110430000.0
1,33681850000.0,21569570.0,70.0,32.0,3110430000.0
2,33760630000.0,21569570.0,127.458589,12.0,3110430000.0
3,33750340000.0,21569570.0,127.458589,88.0,3110430000.0
4,33699800000.0,21569570.0,127.458589,12.0,3110430000.0


### Determining the satisfaction level of the customers ###

In [15]:
scores_data=exp_data1[['MSISDN/Number','Experience_Euclidean_Distance']]

In [16]:
scores_data.head()

Unnamed: 0,MSISDN/Number,Experience_Euclidean_Distance
0,33664960000.0,3110430000.0
1,33681850000.0,3110430000.0
2,33760630000.0,3110430000.0
3,33750340000.0,3110430000.0
4,33699800000.0,3110430000.0


In [17]:
eng_score=data_MSISDN[['Engagement_Euclidean_Distance']]

In [18]:
eng_score

Unnamed: 0,Engagement_Euclidean_Distance
0,5.173738e+08
1,1.234090e+09
2,7.183836e+08
3,1.638393e+09
4,1.158391e+09
5,1.439738e+09
6,9.221409e+08
7,1.371765e+09
8,1.076896e+09
9,9.378444e+08


In [19]:
scores=pd.concat([eng_score,scores_data], axis=1, ignore_index=True)

In [20]:
scores.head()

Unnamed: 0,0,1,2
0,517373800.0,33664960000.0,3110430000.0
1,1234090000.0,33681850000.0,3110430000.0
2,718383600.0,33760630000.0,3110430000.0
3,1638393000.0,33750340000.0,3110430000.0
4,1158391000.0,33699800000.0,3110430000.0


In [21]:
scores.columns=['Engagement_Euclidean_Distance','MSISDN/Number','Experience_Euclidean_Distance']

In [22]:
scores['Engagement_Euclidean_Distance'].fillna(value=scores['Engagement_Euclidean_Distance'].mean(), inplace=True)

In [57]:
scores.head()

Unnamed: 0,Engagement_Euclidean_Distance,MSISDN/Number,Experience_Euclidean_Distance,satisfaction_score
0,517373800.0,33664960000.0,3110430000.0,3627804000.0
1,1234090000.0,33681850000.0,3110430000.0,4344520000.0
2,718383600.0,33760630000.0,3110430000.0,3828814000.0
3,1638393000.0,33750340000.0,3110430000.0,4748823000.0
4,1158391000.0,33699800000.0,3110430000.0,4268821000.0


In [24]:
'''#getting the satisfaction score by adding the engagement and experience score
scores['satisfaction_score']=scores['Engagement_Euclidean_Distance']+scores['Experience_Euclidean_Distance']'''

In [58]:
scores['satisfaction_score'] = scores[['Engagement_Euclidean_Distance', 'Experience_Euclidean_Distance']].mean(axis=1)

In [59]:
scores.head()

Unnamed: 0,Engagement_Euclidean_Distance,MSISDN/Number,Experience_Euclidean_Distance,satisfaction_score
0,517373800.0,33664960000.0,3110430000.0,1813902000.0
1,1234090000.0,33681850000.0,3110430000.0,2172260000.0
2,718383600.0,33760630000.0,3110430000.0,1914407000.0
3,1638393000.0,33750340000.0,3110430000.0,2374411000.0
4,1158391000.0,33699800000.0,3110430000.0,2134411000.0


In [60]:
#sorting the satisfaction score per customer
top10=scores.sort_values(by='satisfaction_score',ascending=False).head(10)


In [80]:
top10

Unnamed: 0,Engagement_Euclidean_Distance,MSISDN/Number,Experience_Euclidean_Distance,satisfaction_score
38391,1780674000.0,33650940000.0,3127786000.0,2454230000.0
94379,1772824000.0,33663850000.0,3131948000.0,2452386000.0
70184,1771882000.0,33665560000.0,3131238000.0,2451560000.0
1643,1762451000.0,33668360000.0,3131407000.0,2446929000.0
97165,1761169000.0,33752000000.0,3131958000.0,2446564000.0
85224,1760919000.0,33661950000.0,3131526000.0,2446223000.0
45279,1760477000.0,33763630000.0,3131799000.0,2446138000.0
18393,1760805000.0,33663660000.0,3131208000.0,2446007000.0
5086,1756118000.0,33760820000.0,3131620000.0,2443869000.0
16024,1757861000.0,33669570000.0,3129495000.0,2443678000.0


In [85]:
Engagement_score=top10['Engagement_Euclidean_Distance']

In [90]:

Engagement_score=top10['Engagement_Euclidean_Distance']
levels=['1','2','3','4','5','6','7','8','9','10']
Experience_score=top10['Experience_Euclidean_Distance']
fig = go.Figure()
fig.add_trace(go.Bar(
    x=levels,
    y=Engagement_score,
    name='Engagement_score',
    marker_color='indianred'
))
fig.add_trace(go.Bar(
    x=levels,
    y=Experience_score,
    name='Experience_score',
    marker_color='lightsalmon'
))

# Here we modify the tickangle of the xaxis, resulting in rotated labels.
fig.update_layout(barmode='group', xaxis_tickangle=-45)
fig.show()

In [62]:
top10.to_csv("Top10_satisfied_customers",index=False)

### Regression Modeling ###

We will use Ridge Regression because of GridSearchCV which allows us to automatically perform 5-fold cross-validation with a range of different regularization parameters in order to find the optimal value of alpha

In [29]:
'''alpha = [1e-15, 1e-10, 1e-8, 1e-4, 1e-3,1e-2, 1, 5, 10, 20]

ridge = Ridge()

parameters = {'alpha': [1e-15, 1e-10, 1e-8, 1e-4, 1e-3,1e-2, 1, 5, 10, 20]}

ridge_regressor = GridSearchCV(ridge, parameters,scoring='neg_mean_squared_error', cv=5)

ridge_regressor.fit(Xs, y)'''

NameError: name 'Xs' is not defined

### K -means Clustering ###

In [63]:
scores.head()

Unnamed: 0,Engagement_Euclidean_Distance,MSISDN/Number,Experience_Euclidean_Distance,satisfaction_score
0,517373800.0,33664960000.0,3110430000.0,1813902000.0
1,1234090000.0,33681850000.0,3110430000.0,2172260000.0
2,718383600.0,33760630000.0,3110430000.0,1914407000.0
3,1638393000.0,33750340000.0,3110430000.0,2374411000.0
4,1158391000.0,33699800000.0,3110430000.0,2134411000.0


In [64]:
scores1=scores[['MSISDN/Number','Engagement_Euclidean_Distance','Experience_Euclidean_Distance']]

In [65]:
scores1.head()

Unnamed: 0,MSISDN/Number,Engagement_Euclidean_Distance,Experience_Euclidean_Distance
0,33664960000.0,517373800.0,3110430000.0
1,33681850000.0,1234090000.0,3110430000.0
2,33760630000.0,718383600.0,3110430000.0
3,33750340000.0,1638393000.0,3110430000.0
4,33699800000.0,1158391000.0,3110430000.0


In [66]:
scores1.describe()

Unnamed: 0,MSISDN/Number,Engagement_Euclidean_Distance,Experience_Euclidean_Distance
count,150001.0,150001.0,150001.0
mean,41824140000.0,916617700.0,3111019000.0
std,2438731000000.0,290777200.0,101811700.0
min,33601000000.0,58362290.0,5353832.0
25%,33650940000.0,779204900.0,3110430000.0
50%,33663610000.0,916617700.0,3110430000.0
75%,33683070000.0,1053934000.0,3130507000.0
max,882397100000000.0,1780674000.0,3132000000.0


In [67]:
#Normalizing the data
x = scores1.values #returns a numpy array
min_max_scaler = preprocessing.MinMaxScaler()
x_scaled = min_max_scaler.fit_transform(x)
scores2 = pd.DataFrame(x_scaled)

In [68]:
# k means using 2 clusters and k-means++ initialization
kmeans = KMeans(n_jobs = -1, n_clusters = 2, init='k-means++')
kmeans.fit(scores2)
pred = kmeans.predict(scores2)

In [69]:
scores_clustered = pd.DataFrame(scores2)
scores_clustered['clusters'] = pred

In [70]:
scores_clustered.head()

Unnamed: 0,0,1,2,clusters
0,7.248772e-08,0.266509,0.993101,1
1,9.163195e-08,0.682645,0.993101,0
2,1.809066e-07,0.383218,0.993101,1
3,1.692516e-07,0.917389,0.993101,0
4,1.119654e-07,0.638693,0.993101,0


In [71]:
scores_clustered['clusters'].value_counts()

0    109976
1     40025
Name: clusters, dtype: int64

### Aggregating the average of satisfaction & experience score per cluster ###

In [72]:
cluster_data=scores_clustered['clusters']

In [73]:
cluster_scores=pd.concat([scores,cluster_data], axis=1, ignore_index=True)

In [74]:
cluster_scores.columns=['Engagement_Euclidean_Distance','MSISDN/Number','Experience_Euclidean_Distance','satisfaction_score','clusters']

In [75]:
cluster_scores.head()

Unnamed: 0,Engagement_Euclidean_Distance,MSISDN/Number,Experience_Euclidean_Distance,satisfaction_score,clusters
0,517373800.0,33664960000.0,3110430000.0,1813902000.0,1
1,1234090000.0,33681850000.0,3110430000.0,2172260000.0,0
2,718383600.0,33760630000.0,3110430000.0,1914407000.0,1
3,1638393000.0,33750340000.0,3110430000.0,2374411000.0,0
4,1158391000.0,33699800000.0,3110430000.0,2134411000.0,0


In [91]:
cluster_scores['clusters'].value_counts()

0    109976
1     40025
Name: clusters, dtype: int64

### Exporting final scores Table to MySQL Database ###

In [47]:
!pip install mysql-connector-python

Collecting mysql-connector-python
  Downloading https://files.pythonhosted.org/packages/32/92/aa9f928c09302be5897c8707264cb56ae2fad10425e37db04f2c4310781b/mysql_connector_python-8.0.21-cp37-cp37m-win_amd64.whl (809kB)
Installing collected packages: mysql-connector-python
Successfully installed mysql-connector-python-8.0.21


In [76]:
scores.head()

Unnamed: 0,Engagement_Euclidean_Distance,MSISDN/Number,Experience_Euclidean_Distance,satisfaction_score
0,517373800.0,33664960000.0,3110430000.0,1813902000.0
1,1234090000.0,33681850000.0,3110430000.0,2172260000.0
2,718383600.0,33760630000.0,3110430000.0,1914407000.0
3,1638393000.0,33750340000.0,3110430000.0,2374411000.0
4,1158391000.0,33699800000.0,3110430000.0,2134411000.0


In [77]:
scores.rename(columns={"Engagement_Euclidean_Distance": "Engagement_score", "Experience_Euclidean_Distance": "Experience_score"})

Unnamed: 0,Engagement_score,MSISDN/Number,Experience_score,satisfaction_score
0,5.173738e+08,3.366496e+10,3.110430e+09,1.813902e+09
1,1.234090e+09,3.368185e+10,3.110430e+09,2.172260e+09
2,7.183836e+08,3.376063e+10,3.110430e+09,1.914407e+09
3,1.638393e+09,3.375034e+10,3.110430e+09,2.374411e+09
4,1.158391e+09,3.369980e+10,3.110430e+09,2.134411e+09
5,1.439738e+09,3.366819e+10,3.110430e+09,2.275084e+09
6,9.221409e+08,3.366537e+10,3.110430e+09,2.016286e+09
7,1.371765e+09,3.376349e+10,3.131221e+09,2.251493e+09
8,1.076896e+09,3.369874e+10,3.110430e+09,2.093663e+09
9,9.378444e+08,3.365922e+10,3.128761e+09,2.033303e+09


In [78]:
scores.to_csv('final_tellco_table.csv',index=False)

### SELECT statement on the exported table using python ###

In [52]:
#First we import MySQL connector
import mysql.connector

In [79]:
import mysql.connector
from mysql.connector import Error

try:
    connection = mysql.connector.connect(host='localhost',
                                         database='elvis_10acad',
                                         user='root',
                                         password='Oyogo0713613781#')

    sql_select_Query = "select * from final_tellco_table" #counting how many rows we have in our table
    cursor = connection.cursor()
    cursor.execute(sql_select_Query)
    records = cursor.fetchall()
    print(cursor.rowcount)
except Error as e:
    print("Error reading data from MySQL table", e)

150001
