In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pathlib import Path
import random

import numpy as np

from utils import save_pickle, load_pickle
from generator import WordGenerator
from graph import WordGraph

In [3]:
PATH_GRAPH = Path("graph.pkl")
TOP_ITEMS = 15
DIRECTED = True
ALPHABET = [chr(ord("A") + i) for i in range(4)]

In [4]:
test_dictionary = {
    'ban': False,
    'can': True,
    'bar': True,
    'car': False,
    'hat': True,
    'heat': False,
    'hot': True,
    'bara': False,
    'ara': True
}

wg = WordGraph(test_dictionary)

print("Relations:")
for relation, count in wg.edges.items():
    print(relation, count)

print("\nTransitions:")
for word, transitions in wg.graph.items():
    print(word, transitions)

wg.relation_map

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

Relations:
('ba', 'ca') 2
('ca', 'ba') 2
('an', 'ar') 2
('ar', 'an') 2
('r', 'ra') 1
('ra', 'r') 1
('ha', 'hea') 1
('hea', 'ha') 1
('hat', 'hot') 1
('hot', 'hat') 1
('ba', 'a') 1
('a', 'ba') 1

Transitions:
ban [('can', ('ba', 'ca')), ('bar', ('an', 'ar'))]
can [('ban', ('ca', 'ba')), ('car', ('an', 'ar'))]
bar [('ban', ('ar', 'an')), ('car', ('ba', 'ca')), ('bara', ('r', 'ra'))]
car [('can', ('ar', 'an')), ('bar', ('ca', 'ba'))]
bara [('bar', ('ra', 'r')), ('ara', ('ba', 'a'))]
hat [('heat', ('ha', 'hea')), ('hot', ('hat', 'hot'))]
heat [('hat', ('hea', 'ha'))]
hot [('hat', ('hot', 'hat'))]
ara [('bara', ('a', 'ba'))]


{'ba → ca': {'transitions': {'[-] → [+]': 0.5, '[+] → [-]': 0.5}, 'counts': 2},
 'ca → ba': {'transitions': {'[+] → [-]': 0.5, '[-] → [+]': 0.5}, 'counts': 2},
 'an → ar': {'transitions': {'[-] → [+]': 0.5, '[+] → [-]': 0.5}, 'counts': 2},
 'ar → an': {'transitions': {'[+] → [-]': 0.5, '[-] → [+]': 0.5}, 'counts': 2},
 'r → ra': {'transitions': {'[+] → [-]': 1.0}, 'counts': 1},
 'ra → r': {'transitions': {'[-] → [+]': 1.0}, 'counts': 1},
 'ha → hea': {'transitions': {'[+] → [-]': 1.0}, 'counts': 1},
 'hea → ha': {'transitions': {'[-] → [+]': 1.0}, 'counts': 1},
 'hat → hot': {'transitions': {'[+] → [+]': 1.0}, 'counts': 1},
 'hot → hat': {'transitions': {'[+] → [+]': 1.0}, 'counts': 1},
 'ba → a': {'transitions': {'[-] → [+]': 1.0}, 'counts': 1},
 'a → ba': {'transitions': {'[+] → [-]': 1.0}, 'counts': 1}}

In [5]:
if not PATH_GRAPH.exists():    
    generator = WordGenerator(ALPHABET, positives=0.3, seed=137)
    words = generator(10000)    
    wg = WordGraph(words)
    save_pickle(wg, PATH_GRAPH)
else:
    wg = load_pickle(PATH_GRAPH)

  0%|          | 0/10000 [00:00<?, ?it/s]

  0%|          | 0/9883 [00:00<?, ?it/s]

  0%|          | 0/342 [00:00<?, ?it/s]

In [6]:
top_positive_to_negatives = dict(sorted(
    wg.relation_map.items(),
    key=lambda entry: np.sqrt(entry[1]["counts"]) * entry[1]["transitions"].get("[+] → [-]", 0),
    reverse=True
)[:TOP_ITEMS])

for i, (relation, data) in enumerate(top_positive_to_negatives.items()):
    print(f"\n{relation} ({data['counts']}):")
    for transition, value in data['transitions'].items():
        print(transition, f"{100 * value:.2f}%")


ACB → AAB (38):
[-] → [-] 44.74%
[+] → [-] 42.11%
[-] → [+] 5.26%
[+] → [+] 7.89%

CAC → CBC (84):
[-] → [-] 45.24%
[+] → [+] 10.71%
[+] → [-] 27.38%
[-] → [+] 16.67%

AAC → ACC (73):
[-] → [-] 45.21%
[+] → [-] 28.77%
[-] → [+] 20.55%
[+] → [+] 5.48%

DAA → DDA (26):
[+] → [-] 46.15%
[-] → [+] 11.54%
[+] → [+] 7.69%
[-] → [-] 34.62%

AAC → ABC (52):
[-] → [-] 48.08%
[+] → [-] 30.77%
[-] → [+] 19.23%
[+] → [+] 1.92%

CBC → CCC (68):
[-] → [+] 16.18%
[-] → [-] 50.00%
[+] → [-] 26.47%
[+] → [+] 7.35%

AC → AAC (88):
[-] → [-] 52.27%
[+] → [+] 5.68%
[+] → [-] 22.73%
[-] → [+] 19.32%

CAC → CCC (81):
[+] → [-] 23.46%
[+] → [+] 9.88%
[-] → [-] 51.85%
[-] → [+] 14.81%

CCB → CAB (44):
[-] → [-] 50.00%
[+] → [-] 31.82%
[+] → [+] 9.09%
[-] → [+] 9.09%

CA → C (54):
[-] → [-] 48.15%
[-] → [+] 16.67%
[+] → [-] 27.78%
[+] → [+] 7.41%

CC → CAC (98):
[-] → [-] 56.12%
[-] → [+] 15.31%
[+] → [+] 8.16%
[+] → [-] 20.41%

CA → CCA (84):
[-] → [-] 51.19%
[+] → [-] 21.43%
[-] → [+] 19.05%
[+] → [+] 8.33%