In [1]:
import os, sys
import pandas as pd
from pandasql import sqldf

rpath = os.path.abspath('..')
if rpath not in sys.path:
    sys.path.insert(0, rpath)

import scripts.read_data_from_db as rd

In [3]:
df = rd.read_data(table_name='processed_data')

INFO:scripts.read_data_from_db:Data fetched succesfully


In [4]:
df.shape

(150000, 53)

In [5]:
pysqldf = lambda q: sqldf(q, globals())

In [6]:
df.columns

Index(['Bearer Id', 'Start', 'Start ms', 'End', 'End ms', 'Dur. (ms)', 'IMSI',
       'MSISDN/Number', 'IMEI', 'Last Location Name', 'Avg RTT DL (ms)',
       'Avg RTT UL (ms)', 'Avg Bearer TP DL (kbps)', 'Avg Bearer TP UL (kbps)',
       'TCP DL Retrans. Vol (Bytes)', 'TCP UL Retrans. Vol (Bytes)',
       'DL TP < 50 Kbps (%)', '50 Kbps < DL TP < 250 Kbps (%)',
       '250 Kbps < DL TP < 1 Mbps (%)', 'DL TP > 1 Mbps (%)',
       'UL TP < 10 Kbps (%)', '10 Kbps < UL TP < 50 Kbps (%)',
       '50 Kbps < UL TP < 300 Kbps (%)', 'UL TP > 300 Kbps (%)',
       'HTTP DL (Bytes)', 'HTTP UL (Bytes)', 'Activity Duration DL (ms)',
       'Activity Duration UL (ms)', 'Dur. (ms).1', 'Handset Manufacturer',
       'Handset Type', 'Nb of sec with 125000B < Vol DL',
       'Nb of sec with 1250B < Vol UL < 6250B',
       'Nb of sec with 31250B < Vol DL < 125000B',
       'Nb of sec with 6250B < Vol DL < 31250B',
       'Nb of sec with Vol DL < 6250B', 'Nb of sec with Vol UL < 1250B',
       'Social Me

### Top 10 handsets used by the customers

In [7]:
query = ''' 
    SELECT DISTINCT 
        "Handset Type", 
        COUNT(*) as UsageCount
    FROM df
    GROUP BY "Handset Type" 
    ORDER BY UsageCount DESC
    limit 10
'''

result_df = pysqldf(query)
result_df

Unnamed: 0,Handset Type,UsageCount
0,Huawei B528S-23A,29310
1,Apple iPhone 6S (A1688),9419
2,Apple iPhone 6 (A1586),9023
3,Apple iPhone 7 (A1778),6326
4,Apple iPhone Se (A1723),5187
5,Apple iPhone 8 (A1905),4993
6,Apple iPhone Xr (A2105),4568
7,Samsung Galaxy S8 (Sm-G950F),4520
8,Apple iPhone X (A1901),3813
9,Samsung Galaxy A5 Sm-A520F,3724


### Top 3 handset Manufacturers


In [11]:
query = ''' 
    SELECT DISTINCT 
        "Handset Manufacturer", 
        COUNT(*) as "Number of Users"
    FROM df
    GROUP BY "Handset Manufacturer" 
    ORDER BY "Number of Users" DESC
    limit 3
'''

result_df = pysqldf(query)
result_df

Unnamed: 0,Handset Manufacturer,Number of Users
0,Apple,69123
1,Samsung,40839
2,Huawei,34423


### Average session Duration by Manufacturer

In [15]:
query = ''' 
    SELECT "Handset Manufacturer", AVG("Dur. (ms)") AS "Avg Session Duration (ms)"
    FROM df
    WHERE "Handset Manufacturer" IN ('Apple', 'Samsung', 'Huawei')
    GROUP BY "Handset Manufacturer";

'''
result_df = pysqldf(query)
result_df

Unnamed: 0,Handset Manufacturer,Avg Session Duration (ms)
0,Apple,102179.329615
1,Huawei,96095.559568
2,Samsung,121664.988467


### Data volume Analysis by Manufacturer


In [17]:
query = ''' 
   SELECT
    "Handset Manufacturer",
    SUM("Total UL (Bytes)" + "Total DL (Bytes)") AS "Total Data Volume (Bytes)"
    FROM df
    WHERE "Handset Manufacturer" IN ('Apple', 'Samsung', 'Huawei')
    GROUP BY "Handset Manufacturer";
'''
result_df = sqldf(query)
result_df

Unnamed: 0,Handset Manufacturer,Total Data Volume (Bytes)
0,Apple,34284070000000.0
1,Huawei,17093930000000.0
2,Samsung,20236760000000.0


### Top 3 Handsets per top 3 handset Manufacurers

In [23]:
query = ''' 
    WITH RankedHandsets AS (
    SELECT
        "Handset Manufacturer",
        "Handset Type",
        RANK() OVER (PARTITION BY "Handset Manufacturer" ORDER BY COUNT(*) DESC) AS "Rank"
    FROM df
    WHERE "Handset Manufacturer" IN ('Apple', 'Samsung', 'Huawei')
    GROUP BY "Handset Manufacturer", "Handset Type"
    )
    SELECT
        "Handset Manufacturer",
        "Handset Type",
        "Rank"
    FROM RankedHandsets
    WHERE "Rank" <= 5;
'''

result_df = sqldf(query)
result_df

Unnamed: 0,Handset Manufacturer,Handset Type,Rank
0,Apple,Huawei B528S-23A,1
1,Apple,Apple iPhone 6S (A1688),2
2,Apple,Apple iPhone 6 (A1586),3
3,Apple,Apple iPhone 7 (A1778),4
4,Apple,Apple iPhone Se (A1723),5
5,Huawei,Huawei B528S-23A,1
6,Huawei,Huawei E5180,2
7,Huawei,Huawei P20 Lite Huawei Nova 3E,3
8,Huawei,Huawei P20,4
9,Huawei,Huawei Y6 2018,5
