In [34]:
import sys

sys.path.append('../')

In [35]:
from typing import Text
import pandas as pd


In [36]:
def create_mock_dataframe(file_path: Text) -> pd.DataFrame:
    """ Create mock dataframe from file """
    with open(file_path, 'r') as fp:
        lines = [(line.strip().split('|')) for line in fp]

    return pd.DataFrame(lines, columns=['Itemset', 'Time points'])


def create_mock_raw_dataframe() -> pd.DataFrame:
    """ Create raw mock dataframe """
    return pd.DataFrame({
        'Time points': [1, 2, 3, 3, 6, 7, 7, 8, 9, 11],
        'Itemset': ['a', 'a', 'a', 'b', 'a', 'a', 'b', 'c', 'b', 'd'],
    })

test_file_path = '../tests/test_files/contextEMMA.txt'

In [37]:
# Transformed input - Itemsets must be positive integers and time points cannot be repeated
mock_df = create_mock_dataframe(test_file_path)
mock_df

Unnamed: 0,Itemset,Time points
0,1,1
1,1,2
2,1 2,3
3,1,6
4,1 2,7
5,3,8
6,2,9
7,4,11


In [38]:
# Raw input - Itemsets can have arbitrary labels and time points may be repeated for simultaneous events 
mock_df_raw = create_mock_raw_dataframe()
mock_df_raw

Unnamed: 0,Time points,Itemset
0,1,a
1,2,a
2,3,a
3,3,b
4,6,a
5,7,a
6,7,b
7,8,c
8,9,b
9,11,d


### Episode Mining Examples

In [39]:
from spmf import EMMA

emma = EMMA(min_support=2, max_window=2, timestamp_present=True, transform=False)   # Transformed Input dataframe
output = emma.run_pandas(mock_df)   
output

Unnamed: 0,Frequent episode,Support
0,1,5
1,2,3
2,1 2,2
3,1 -> 1,3
4,1 -> 2,2
5,1 -> 1 2,2


In [40]:
from spmf import EMMA

emma = EMMA(min_support=2, max_window=2, timestamp_present=True)    # Raw Input dataframe
output = emma.run_pandas(mock_df_raw)   
output

Unnamed: 0,Frequent episode,Support
0,a,5
1,b,3
2,a b,2
3,a -> a,3
4,a -> b,2
5,a -> a b,2


In [41]:
from spmf import TKE

tke = TKE(k=6, max_window=2, timestamp_present=True, transform=False)   # Transformed Input dataframe
output = tke.run_pandas(mock_df)    
output

Unnamed: 0,Frequent episode,Support
0,1 -> 2,2
1,1 2,2
2,1 -> 1 2,2
3,1 -> 1,3
4,2,3
5,1,5


In [42]:
from spmf import TKE

tke = TKE(k=6, max_window=2, timestamp_present=True)     # Raw Input dataframe
output = tke.run_pandas(mock_df_raw)   
output

Unnamed: 0,Frequent episode,Support
0,a -> b,2
1,a b,2
2,a -> a b,2
3,a -> a,3
4,b,3
5,a,5


### Episode Rules Mining Examples

In [43]:
from spmf import EMMARules

emma_rules = EMMARules(max_window=2, timestamp_present=True, min_confidence=0.2,
                           max_consequent_count=1, min_support=2, transform=False)  # Transformed input dataframe
    
output = emma_rules.run_pandas(mock_df)     
output

Unnamed: 0,Frequent episode,Support,Confidence
0,{1} ==> {1},3,0.6
1,{1} ==> {2},2,0.4
2,"{1} ==> {1,2}",2,0.4


In [44]:
from spmf import EMMARules

emma_rules = EMMARules(max_window=2, timestamp_present=True, min_confidence=0.2,
                           max_consequent_count=1, min_support=2)   # Raw input dataframe
    
output = emma_rules.run_pandas(mock_df_raw)     
output

Unnamed: 0,Frequent episode,Support,Confidence
0,{a} ==> {a},3,0.6
1,{a} ==> {b},2,0.4
2,"{a} ==> {a,b}",2,0.4


In [45]:
from spmf import TKERules

tke_rules = TKERules(k=6, max_window=2, timestamp_present=True,
                         min_confidence=0.2, max_consequent_count=1, min_support=2, transform=False)    # Transformed input dataframe 
    
output = tke_rules.run_pandas(mock_df)      
output

Unnamed: 0,Frequent episode,Support,Confidence
0,{1} ==> {2},2,0.4
1,"{1} ==> {1,2}",2,0.4
2,{1} ==> {1},3,0.6


In [46]:
from spmf import TKERules

tke_rules = TKERules(k=6, max_window=2, timestamp_present=True,
                         min_confidence=0.2, max_consequent_count=1, min_support=2)     # Raw input dataframe 
    
output = tke_rules.run_pandas(mock_df_raw)      
output

Unnamed: 0,Frequent episode,Support,Confidence
0,{a} ==> {b},2,0.4
1,"{a} ==> {a,b}",2,0.4
2,{a} ==> {a},3,0.6
