# Importing some utilities

In [None]:
%cd ../..
import glob
import sys
from dmg.realism.mle import whichFitsBetter
from scripts.modelSet import datasets_supported
msetObject = datasets_supported['yakindu-exercise']
train_path = 'data/yakindu-exercise/train'
backend = 'python'

# Loading Training set

In [None]:
Gs = [msetObject.getGraphReal(f,backend) 
                for f in glob.glob(train_path + "/*")]

# Random EMF

For each rule in RandomEMF, depending on the type of rule, we estimate its parameters. More concretely, for shapes we use the function `whichFitsBetter` that selects the best distribuntion by using maximum likeihood. For priorities in alternative rules, the procedure described in the paper is done and it is based on counting each different alternative in the set $R_{II}$.

## Number of regions per statechart

For the rule:

```
Root : Statechart ->
		regions += RegionsStatechart#Distribution(parameters)
	; 
```

In [None]:
import matplotlib.pyplot as plt
import numpy as np

nums = []
for G in Gs:
    nums.append(G.out_degree(0))
bins = np.arange(1, 3, 0.5)
plt.hist(nums, bins = bins, alpha=0.5, density = False)
print()

In [None]:
whichFitsBetter(nums)

In [None]:
0.18121673811444547**2

## Number of regions per state

For the rule:

```
	RState (Region r) : State ->
		regions += RegionsState#Distribution(parameters)
	;
	
```

In [None]:
numberSubvertex = []
for G in Gs:
    for n in G:
        if G.nodes[n]['type'] =='State':
            cont = 0
            for e in G[n]:
                for e2 in G[n][e]:
                     if (G[n][e][e2]['type'] == 'regions'):
                        cont = cont + 1
            numberSubvertex.append(cont)
            
bins = np.arange(0, 10, 1)
plt.hist(numberSubvertex, bins = bins, alpha=0.5, density = True)

In [None]:
best = whichFitsBetter(numberSubvertex)
print(best)
#lambda_= best['params']
#print(r,p)
#print(best)

In [None]:
0.4121063377369438**2

## Type of vertices

For the rule:

```
	alter Vertices (Region r) : Vertex ->
		 RPseudoState(r)#a | RRegularState(r)#b
	;
	
```

In [None]:
import numpy as np
ps = []
for G in Gs:
    p = [0, 0]
    for n in G:
        if (G.nodes[n]['type'] == 'FinalState'):
            p[0] = p[0] + 1
        if (G.nodes[n]['type'] == 'State'):
            p[0] = p[0] + 1
        if (G.nodes[n]['type'] == 'Synchronization'):
            p[1] = p[1] + 1
        if (G.nodes[n]['type'] == 'Choice'):
            p[1] = p[1] + 1
        if (G.nodes[n]['type'] == 'Exit'):
            p[1] = p[1] + 1
        if (G.nodes[n]['type'] == 'Entry'):
            p[1] = p[1] + 1
    p = np.array(p)
    ps.append(p/np.sum(p))
ps = np.array(ps)
print(np.mean(ps, axis = 0)/np.min(np.mean(ps, axis = 0)))

For the rule:

```
	alter RRegularState (Region r) : RegularState ->
		RFinal#a | RState(r)#b 
	;
	
```

In [None]:
ps = []
for G in Gs:
    p = [0, 0]
    for n in G:
        if (G.nodes[n]['type'] == 'FinalState'):
            p[0] = p[0] + 1
        if (G.nodes[n]['type'] == 'State'):
            p[1] = p[1] + 1
    p = np.array(p)
    ps.append(p/np.sum(p))
ps = np.array(ps)
print(np.mean(ps, axis = 0)/np.min(np.mean(ps, axis = 0)))

For the rule:

```
	alter RPseudoState(Region r) : Pseudostate ->
		 RTypeSynchronization(r)#a  | RTypeExit#b | RTypeChoice(r)#c
		 | if (r.vertices.filter[it instanceof Entry].size == 0 
		 	&& r.vertices.size > 0 
		 ) RTypeEntry(r)#d
	;
	
```

We want to estimate `a`, `b`, `c` and `d`.

In [None]:
ps = []
for G in Gs:
    p = [0, 0, 0, 0]
    for n in G:
        if (G.nodes[n]['type'] == 'Synchronization'):
            p[0] = p[0] + 1
        if (G.nodes[n]['type'] == 'Choice'):
            p[1] = p[1] + 1
        if (G.nodes[n]['type'] == 'Exit'):
            p[2] = p[2] + 1
        if (G.nodes[n]['type'] == 'Entry'):
            p[3] = p[3] + 1
    p = np.array(p)
    ps.append(p/np.sum(p))
ps = np.array(ps)

    
print(np.mean(ps, axis = 0)/0.01304931)

## Transitions per state

For the rule:

```
	RState (Region r) : State ->
		outgoingTransitions += RTransition(self,r)#Distribution(parameters)
	;
	
```

In [None]:
numberTransitions = []
for G in Gs:
    for n in G:
        if (G.nodes[n]['type'] == 'State'):
            cont = 0
            for e in G[n]:
                for e2 in G[n][e]:
                     if (G[n][e][e2]['type'] == 'outgoingTransitions'):
                        cont = cont + 1
            numberTransitions.append(cont)
bins = np.arange(0, 10, 1)
plt.hist(numberTransitions, bins = bins, alpha=0.5, density = True)
print()

In [None]:
whichFitsBetter(numberTransitions)

For the rule:

```
	RTypeChoice (Region r) : Choice->
		outgoingTransitions += RTransition(self,r)#Distribution(parameters)
	;
	
```

In [None]:
numberTransitions = []
for G in Gs:
    for n in G:
        if (G.nodes[n]['type'] == 'Choice'):
            cont = 0
            for e in G[n]:
                for e2 in G[n][e]:
                     if (G[n][e][e2]['type'] == 'outgoingTransitions'):
                        cont = cont + 1
            numberTransitions.append(cont)
bins = np.arange(0, 10, 0.5)
plt.hist(numberTransitions, bins = bins, alpha=0.5, density = False)
print()

In [None]:
whichFitsBetter(numberTransitions)

In [None]:
.3006535122764191**2

## Number vertex per region

## Statechart

For the rule:

```
	RegionsStatechart : Region ->
		vertices += Vertices(self)#Distribution(parameters)
	;
	
```

In [None]:
def fromStateChart(G,n):
    for m in G:
        if (G.nodes[m]['type'] == 'Statechart'):
            try:
                for e in G[m][n]:
                    if (G[m][n][e]['type'] == 'regions'):
                        return True
            except:
                return False
    return False

numberVert = []
for G in Gs:
    for n in G:
        if (G.nodes[n]['type'] == 'Region') and (fromStateChart(G,n)):
            cont = 0
            for e in G[n]:
                for e2 in G[n][e]:
                     if (G[n][e][e2]['type'] == 'vertices'):
                        cont = cont + 1
            numberVert.append(cont)
bins = np.arange(0, 10, 1)
plt.hist(numberVert, bins = bins, alpha=0.5, density = True)
print()

In [None]:
print(whichFitsBetter(numberVert))

## State

For the rule:

```
	RegionsState : Region ->
		vertices += Vertices(self)#Distribution(parameters)
	;
	
```

In [None]:
def fromState(G,n):
    for m in G:
        if (G.nodes[m]['type'] == 'State'):
            try:
                for e in G[m][n]:
                    if (G[m][n][e]['type'] == 'regions'):
                        return True
            except:
                return False
    return False

numberVert = []
for G in Gs:
    for n in G:
        if (G.nodes[n]['type'] == 'Region') and (fromState(G,n)):
            cont = 0
            for e in G[n]:
                for e2 in G[n][e]:
                     if (G[n][e][e2]['type'] == 'vertices'):
                        cont = cont + 1
            numberVert.append(cont)
bins = np.arange(0, 10, 1)
plt.hist(numberVert, bins = bins, alpha=0.5, density = True)
print()

In [None]:
whichFitsBetter(numberVert)

The generated models used to report the results in the paper are already provided.