# Generating ORM Facts & Fact Types

In [245]:
import markovify
import pandas as pd
import numpy as np

### Sample data

In [257]:
facts = ['The Academic with empNr 715 has EmpName ‘Adams A’',
        'The Academic with empNr 715 works for the Dept named ‘Computer Science’',
        'The Academic with empNr 715 occupies the Room with roomNr ‘69-301’',
        'The Academic with empNr 715 uses the Extension with extNr ‘2345’',
        'The Extension with extNr ‘2345’ provides the AccessLevel with code ‘LOC’',
        'The Academic with empNr 715 is contracted till the Date with mdy-code ‘01/31/95’']

In [171]:
fact_types = ['Academic has EmpName',
               'Academic works for Dept',
               'Academic occupies Room',
               'Academic uses Extension',
               'Extension provides AccessLevel',
               'Academic is contracted till Date',
               'Academic is tenured',
               'Room contains Academic',
               'EmpName belongs to Academic',
               'Dept uses AccessLevel']

In [145]:
df = pd.DataFrame(zip(facts, facts_types), columns=['facts', 'fact_types'])
df

Unnamed: 0,facts,fact_types
0,The Academic with empNr 715 has EmpName ‘Adams A’,Academic has EmpName
1,The Academic with empNr 715 works_for_the Dept...,Academic works for Dept
2,The Academic with empNr 715 occupies_the Room ...,Academic occupies Room
3,The Academic with empNr 715 uses_the Extension...,Academic uses Extension
4,The Extension with extNr ‘2345’ provides_the A...,Extension provides AccessLevel
5,The Academic with empNr 715 is_contracted_till...,Academic is contracted till Date


### Fitting a markov chain & generating facts types

In [178]:
text_model = markovify.NewlineText(fact_types, state_size=1)
text_model

<markovify.text.NewlineText at 0x111f55198>

In [182]:
for i in range(10):
    print(text_model.make_sentence())

Dept uses Extension provides AccessLevel
Academic has EmpName belongs to Academic is tenured
EmpName belongs to Academic is contracted till Date
Academic works for Dept uses Extension provides AccessLevel
Dept uses Extension
Room contains Academic is tenured
Room contains Academic uses AccessLevel
Dept uses Extension provides AccessLevel
Room contains Academic is contracted till Date
Academic works for Dept uses AccessLevel


### Generating facts from generated fact type

In [183]:
generated_fact_type = "Academic works for Dept uses Extension provides AccessLevel"
generated_fact_type

'Academic works for Dept uses Extension provides AccessLevel'

In [220]:
tokens = {'Academic':['empNr 715', 'empNr 281', 'empNr 372'],
         'Dept':['Sales', 'Marketing', 'Analytics'],
         'Extension':['1100', '5502', '3463'],
         'AccessLevel':['Read Only', 'Full Access', 'Limited']}

In [241]:
def replacetoken(string):
    if string in tokens.keys():
        toks = tokens.get(string)
        idx = np.random.randint(0,3)
        return str(string + ' with ' + toks[idx])
    else:
        return string

In [244]:
for i in range(20):
    print(' '.join([replacetoken(i) for i in generated_fact_type.split()]))

Academic with empNr 372 works for Dept with Analytics uses Extension with 1100 provides AccessLevel with Read Only
Academic with empNr 281 works for Dept with Marketing uses Extension with 3463 provides AccessLevel with Limited
Academic with empNr 372 works for Dept with Analytics uses Extension with 1100 provides AccessLevel with Read Only
Academic with empNr 715 works for Dept with Analytics uses Extension with 1100 provides AccessLevel with Full Access
Academic with empNr 372 works for Dept with Marketing uses Extension with 5502 provides AccessLevel with Limited
Academic with empNr 281 works for Dept with Sales uses Extension with 1100 provides AccessLevel with Limited
Academic with empNr 715 works for Dept with Marketing uses Extension with 3463 provides AccessLevel with Read Only
Academic with empNr 281 works for Dept with Sales uses Extension with 1100 provides AccessLevel with Limited
Academic with empNr 715 works for Dept with Sales uses Extension with 3463 provides AccessLeve

# Proposed Architecture

<img src='markovORM.jpg'></img>

1. The engine first extracts facts from a database or ORM diagram(s)
2. Next these facts are turned into fact types and fact tokens are stored separately
3. The fact types are used to train a sequential encoder into Thought vectors
4. The Thought vectors are then decoded back to generated fact types
5. A randomizer then picks up random tokens (some of which are without replacement) and adds them to the generated fact types to generate facts
6. These facts are then used to impute a Relational database

### Further Study

https://aclweb.org/anthology/P18-1151