# Document description
In this notebook we run the VMSP algorithm with different minimum supports, to be able to tune the minimum support.

# Import

In [1]:
from spmf import Spmf
import numpy as np
import pandas as pd
import os
import re
import matplotlib.pyplot as plt
import ast
pd.options.plotting.backend = "plotly"
import plotly.express as px
from chart_studio import plotly
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.subplots as sp
pd.options.display.max_colwidth = None
pd.set_option('display.max_rows', 70)

# Tests

In [27]:
# A test using there package 
spmf = Spmf("VMSP", input_filename="/home/s164574/motifs_and_competition_in_app_usage/20sec_sessions_0_01_removed.txt",
            output_filename="/home/s164574/motifs_and_competition_in_app_usage/output_VMSP_20sec_sessions_0_01_removed_min_sup_0_005.txt",
            spmf_bin_location_dir="/home/s164574/motifs_and_competition_in_app_usage/",
            arguments=[0.00005])
spmf.run()
print(spmf.to_pandas_dataframe(pickle=True))
spmf.to_csv("output_VMSP_20sec_sessions_0_01_removed_min_sup_0_005.csv")

KeyboardInterrupt: 

To be able to interpret the discovered patterns the app codes are translated to their corresponding package names

In [3]:
data = pd.read_csv('sorted_coded_data.csv')
app_dict=dict(zip(data.app_code, data.package_name))

In [None]:
output=pd.read_csv("output_VMSP_20sec_sessions_0_01_removed_min_sup_0_005.csv",delimiter=';')
output['pattern']=output.pattern.apply(lambda l: [app_dict[int(i)] for i in ast.literal_eval(l)])
output=output.sort_values(['sup'],ascending=False)
output['pattern_length']=output.pattern.apply(lambda l: len(l))
output.to_csv("output_VMSP_20sec_sessions_0_01_removed_min_sup_0_005_translated.csv",index=False)

In [None]:
print("Number of frequent patterns of length 1:",len(output[output.pattern_length==1]))
print("Max lengths of frequent patterns:",output.pattern_length.max())

In [34]:
# A test using there package 
spmf = Spmf("VMSP", input_filename="/home/s164574/motifs_and_competition_in_app_usage/20sec_sessions_0_01_removed.txt",
            output_filename="/home/s164574/motifs_and_competition_in_app_usage/output_VMSP_20sec_sessions_0_01_removed_min_sup_0_9.txt",
            spmf_bin_location_dir="/home/s164574/motifs_and_competition_in_app_usage/",
            arguments=[0.009])
spmf.run()
print(spmf.to_pandas_dataframe(pickle=True))
spmf.to_csv("output_VMSP_20sec_sessions_0_01_removed_min_sup_0_9.csv")

>/home/s164574/motifs_and_competition_in_app_usage/spmf.jar
 Total time ~ 631160 ms
 Frequent sequences count : 87
 Max memory (mb) : 14850.77449035644587
minsup 475548
Intersection count 428 


                 pattern      sup
0                [53996]  1065566
1                [49042]   523059
2                [45846]  1581664
3                [45284]   851696
4                [45246]   766567
..                   ...      ...
82  [19968, 23949, 7649]   609083
83  [23949, 7649, 19968]   494938
84  [19968, 7649, 19968]   752957
85   [23949, 7649, 7649]   521353
86   [7649, 23949, 7649]   590476

[87 rows x 2 columns]


In [35]:
output=pd.read_csv("output_VMSP_20sec_sessions_0_01_removed_min_sup_0_9.csv",delimiter=';')
output['pattern']=output.pattern.apply(lambda l: [app_dict[int(i)] for i in ast.literal_eval(l)])
output=output.sort_values(['sup'],ascending=False)
output['pattern_length']=output.pattern.apply(lambda l: len(l))
output.to_csv("output_VMSP_20sec_sessions_0_01_removed_min_sup_0_9_translated.csv",index=False)

In [36]:
print("Number of frequent patterns of length 1:",len(output[output.pattern_length==1]))
print("Max lengths of frequent patterns:",output.pattern_length.max())

Number of frequent patterns of length 1: 15
Max lengths of frequent patterns: 3


## Creating tunning graphs

In [2]:
tunning_results=pd.read_csv("Tunning_min_sup_results.csv")

In [10]:
tunning_results=tunning_results[1:]

In [11]:
tunning_results

Unnamed: 0,min. sup.(%),num of frequent patterns founded,max length of founded pattern,num of length 1 founded pattern,number sessions corresponds to min. sup.
1,0.01,19216.0,10.0,98.0,5283.8635
2,0.05,2320.0,7.0,137.0,26419.3175
3,0.1,1003.0,6.0,91.0,52838.635
4,0.15,618.0,5.0,72.0,79257.9525
5,0.2,441.0,5.0,65.0,105677.27
6,0.25,342.0,5.0,54.0,132096.5875
7,0.3,282.0,5.0,50.0,158515.905
8,0.35,245.0,4.0,39.0,184935.2225
9,0.4,210.0,4.0,34.0,211354.54
10,0.45,182.0,4.0,28.0,237773.8575


In [14]:
#fig1 =px.scatter(tunning_results,x='threshold(%)', y='num of infrequency apps',title = 'Number of infrequent apps')
fig1 =px.scatter(tunning_results,x='min. sup.(%)', y='number sessions corresponds to min. sup.',width=625,height=400)
fig2 =px.scatter(tunning_results,x='min. sup.(%)', y='num of frequent patterns founded',width=625,height=400)
fig3 =px.scatter(tunning_results,x='min. sup.(%)', y='max length of founded pattern',width=625,height=400)
fig4 = px.scatter(tunning_results,x='min. sup.(%)', y='num of length 1 founded pattern ',width=625,height=400)

In [18]:
fig1

In [15]:
fig2

In [16]:
fig3

In [17]:
fig4

In [16]:
figure1 =px.scatter(tunning_results,x='min. sup.(%)', y='num of frequent patterns founded',title = 'Number of frequent patterns')
figure2 =px.scatter(tunning_results,x='min. sup.(%)', y='max length of founded pattern',title = 'length of longest pattern')
figure3 = px.scatter(tunning_results,x='min. sup.(%)', y='num of length 1 founded pattern ',title='Number of patters of length 1')


figure1_traces = []
figure2_traces = []
figure3_traces = []
for trace in range(len(figure1["data"])):
    figure1_traces.append(figure1["data"][trace])
for trace in range(len(figure2["data"])):
    figure2_traces.append(figure2["data"][trace])
for trace in range(len(figure3["data"])):
    figure3_traces.append(figure3["data"][trace]) 
#Create a 1x2 subplot
this_figure = sp.make_subplots(rows=2, cols=2) 

# Get the Express fig broken down as traces and add the traces to the proper plot within in the subplot
for traces in figure1_traces:
    this_figure.append_trace(traces, row=1, col=1)
for traces in figure2_traces:
    this_figure.append_trace(traces, row=1, col=2)
for traces in figure3_traces:
    this_figure.append_trace(traces, row=2, col=1)

#the subplot as shown in the above ima

In [17]:
this_figure