In [10]:
import os, sys
lib_path = os.path.abspath(os.path.join('../..'))
sys.path.append(lib_path)

In [11]:
from gsa.reader import ReadFromCSV
from gsa.rules_trie import RulesTrie, ClosureRulesTrie
from gsa.rules_trie import RulesImportance, HypothesisImportance

In [12]:
sequence_reader = ReadFromCSV(file_name='../data/full_data_shuffle.csv',
                              coding_dict={'work': 1,
                                           'separation': 2,
                                           'partner': 3,
                                           'marriage': 4,
                                           'children': 5,
                                           'parting': 6,
                                           'divorce': 7,
                                           'education': 8})

data, label = sequence_reader.from_file_to_data_list(label_name='label')
print(len(data))

4857


#### Using data and label we can build Rules Trie and Closure Rules Trie

In [13]:
rules_trie = RulesTrie(list_of_sequences=data, label=label)
closure_rules_trie = ClosureRulesTrie(list_of_sequences=data, label=label)

#### We can see for example the full trie structure and support for some sequence

In [17]:
print(rules_trie.node_full_sequence_dict.items()[0: 5])
print(rules_trie.support_t(rule=[['1']], label=1))
print(rules_trie.support_t(rule=[['1']], label=0))

print(closure_rules_trie.node_full_sequence_dict.items()[0: 5])
print(closure_rules_trie.support_t(rule=[['1']], label=1))
print(closure_rules_trie.support_t(rule=[['1']], label=0))
print("")

[(0, []), (1, [['1']]), (2, [['1'], ['8']]), (3, [['1'], ['8'], ['2']]), (4, [['4', '2']])]
0.328802588997
0.287137681159
[(0, []), (1, [['1']]), (2, [['1'], ['8']]), (3, [['1'], ['8'], ['2']]), (4, [['4', '2']])]
0.328802588997
0.287137681159



#### Also we can take important rules by some threshold

In [18]:
print("Rules with min support 0.2")
print(rules_trie.important_rules_selection(min_threshold=0.2, label=0))
print(rules_trie.important_rules_selection(min_threshold=0.2, label=1))
print("")
print(closure_rules_trie.important_rules_selection(min_threshold=0.2, label=0))
print(closure_rules_trie.important_rules_selection(min_threshold=0.2, label=1))

Rules with min support 0.2
[[['1']], [['2']], [['8']]]
[[['1']], [['2']], [['8']]]

[[['1']], [['2']], [['8']]]
[[['1']], [['2']], [['8']]]


#### Also we can use a tool that uses in classification task, it takes rules with some growth rate threshold  
#### Create some candidates by min support

In [19]:
rules_candidates_for1 = rules_trie.important_rules_selection(min_threshold=0.01, label=1)
rules_candidates_for0 = rules_trie.important_rules_selection(min_threshold=0.01, label=0)

#### From candidates select important rules by threshold

In [22]:
important_rules_for0 = RulesImportance(rules=rules_candidates_for0, trie=rules_trie, threshold=2, label=0)
important_rules_for1 = RulesImportance(rules=rules_candidates_for1, trie=rules_trie, threshold=2, label=1)
print("Important Rules")
print(important_rules_for0.dict_of_rules.items()[0: 3])
print(important_rules_for1.dict_of_rules.items()[0: 3])
print("")

Important Rules
[('153', [['8'], ['4', '2']]), ('135', [['2'], ['8'], ['1'], ['4'], ['5'], ['7']]), ('139', [['1', '2'], ['8']])]
[('155', [['1'], ['8'], ['2'], ['4'], ['5']]), ('157', [['1'], ['8'], ['4'], ['5']]), ('159', [['8'], ['2'], ['1']])]



#### From candidates select important rules by threshold

In [25]:
important_rules_for0 = RulesImportance(rules=rules_candidates_for0, trie=rules_trie, threshold=2, label=0)
important_rules_for1 = RulesImportance(rules=rules_candidates_for1, trie=rules_trie, threshold=2, label=1)
print("Important Rules")
print(important_rules_for0.dict_of_rules.items()[0: 3])
print(important_rules_for1.dict_of_rules.items()[0: 3])
print("")

Important Rules
[('153', [['8'], ['4', '2']]), ('135', [['2'], ['8'], ['1'], ['4'], ['5'], ['7']]), ('139', [['1', '2'], ['8']])]
[('155', [['1'], ['8'], ['2'], ['4'], ['5']]), ('157', [['1'], ['8'], ['4'], ['5']]), ('159', [['8'], ['2'], ['1']])]



#### The same actions but for closure patters

In [29]:
rules_candidates_for1 = closure_rules_trie.important_rules_selection(min_threshold=0.01, label=1)
rules_candidates_for0 = closure_rules_trie.important_rules_selection(min_threshold=0.01, label=0)
important_rules_for0 = RulesImportance(rules=rules_candidates_for0, trie=rules_trie, threshold=2, label=0)
important_rules_for1 = RulesImportance(rules=rules_candidates_for1, trie=rules_trie, threshold=2, label=1)
print("Important Closure Rules")
print(important_rules_for0.dict_of_rules.items()[0: 3])
print(important_rules_for1.dict_of_rules.items()[0: 3])
print("")

Important Closure Rules
[('153', [['8'], ['4', '2']]), ('135', [['2'], ['8'], ['1'], ['4'], ['5'], ['7']]), ('139', [['1', '2'], ['8']])]
[('156', [['1'], ['8'], ['4'], ['5']]), ('159', [['8'], ['2'], ['1'], ['4']]), ('158', [['8'], ['2'], ['1']])]



#### The same actions but for hypothesis

In [31]:
rules_candidates_for1 = rules_trie.important_rules_selection(min_threshold=0.001, label=1)
rules_candidates_for0 = rules_trie.important_rules_selection(min_threshold=0.001, label=0)
important_rules_for0 = HypothesisImportance(rules=rules_candidates_for0, trie=rules_trie, label=0)
important_rules_for1 = HypothesisImportance(rules=rules_candidates_for1, trie=rules_trie, label=1)
print("Important Hypothesis")
print(important_rules_for0.dict_of_rules.items()[0: 2])
print(important_rules_for1.dict_of_rules.items()[0: 2])
print("")

Important Hypothesis
[('349', [['8'], ['1'], ['2'], ['5'], ['4']]), ('595', [['2'], ['4'], ['5'], ['1'], ['7']])]
[('524', [['2'], ['1'], ['8'], ['4'], ['7'], ['5']]), ('527', [['2', '8'], ['4']])]

