# Морфологічний аналіз

In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import seaborn as sns
sns.set_style('whitegrid')
# Jupyter rendering stuff
from IPython.display import display, Math, Latex, HTML, clear_output

In [2]:
import itertools

## Input data generation

In [3]:
def generate_probabilities(*sizes):
    result = [np.random.random(size) for size in sizes]
    for vec in result:
        vec /= np.sum(vec)
    return result

In [4]:
def generate_cross_relation(size1, size2):
    return np.random.random(size=(size1, size2)) * 2 - 1

In [5]:
outer_f_s = (4,2,4,3,1,2,3)
outer_f_p = generate_probabilities(*outer_f_s)
inner_f_s = (5,4)
inner_f_p = generate_probabilities(*inner_f_s)

In [6]:
outer_cross_rel = {(i, j):generate_cross_relation(s1, s2) for (i,s1), (j,s2) in itertools.combinations(enumerate(outer_f_s), 2)}
inner_cross_rel = {(i, j):generate_cross_relation(s1, s2) for (i,s1), (j,s2) in itertools.combinations(enumerate(inner_f_s), 2)}
out_in_cross_rel = {(i, j):generate_cross_relation(s1, s2) for (i,s1), (j,s2) in itertools.product(enumerate(outer_f_s), enumerate(inner_f_s))}

In [7]:
#presenation example
# outer_f_s = (3, 4, 2)
# outer_f_p = [np.array([0.3, 0.5, 0.2]), np.array([0.4, 0.3, 0.1, 0.2]), np.array([0.3, 0.7])]
# outer_cross_rel = {(i, j):np.zeros((s1, s2)) for (i,s1), (j,s2) in itertools.combinations(enumerate(outer_f_s), 2)}
# outer_cross_rel[(0,1)][0,0] = 0.5
# outer_cross_rel[(0,1)][2,1] = -0.5
# outer_cross_rel[(0,2)][0,0] = 0.2
# outer_cross_rel[(0,2)][1,0] = 0.3
# outer_cross_rel[(1,2)][0,0] = 0.5
# outer_cross_rel[(1,2)][2,1] = -1

## Calculating table for outer factors

In [8]:
df_outer = pd.DataFrame(index=pd.MultiIndex.from_product(tuple(range(size) for size in outer_f_s),
                                                         names=[str(i + 1) for i in range(len(outer_f_s))]))

In [9]:
def calculate_correlation(df, cross_rel):
    factors = (label for label in df.index.labels)
    result = 1
    for (f1,a1), (f2, a2) in itertools.combinations(enumerate(factors), 2):
        result *= cross_rel[f1, f2][a1, a2] + 1
    df['C'] = result

In [10]:
def calculate_cond_probs(df, factor_p):
    factors = (label for label in df.index.labels)
    probs = np.array([factor_p[i][factor] for i, factor in enumerate(factors)])
    probs = np.prod(probs, axis = 0) / probs
    for i in range(len(probs)):
        df_outer['P%d_' % (i + 1)] = probs[i] * df['C']

In [11]:
def normalize_cond_probs(df, factor_s):
    sn = [slice(None)] * len(factor_s)
    for i in range(len(factor_s)):
        sn_cur = sn.copy()
        df['P%d' % (i + 1)] = np.zeros(df.shape[0])
        for j in range(outer_f_s[i]):
            sn_cur[i] = j
            df.loc[tuple(sn_cur), 'P%d' % (i + 1)] = \
            df.loc[tuple(sn_cur), 'P%d_' % (i + 1)].as_matrix() / df.loc[tuple(sn_cur), 'P%d_' % (i + 1)].sum()

In [12]:
def generate_prob_matrices(df, factor_s):
    sn = [slice(None)] * len(factor_s)
    p_matrices = []
    for left in range(len(factor_s)):
        right = (left + 1) % len(factor_s)
        p = np.zeros((factor_s[left], factor_s[right]))
        for i, j in itertools.product(range(factor_s[left]), range(factor_s[right])):
            sn_cur = sn.copy()
            sn_cur[left], sn_cur[right] = i, j
            p[i,j] = df.loc[tuple(sn_cur),'P%d' % (right + 1)].sum()
        p_matrices.append(p)
    return p_matrices

In [13]:
def create_matrix_set(df, factor_s, factor_p, cross_rel):
    calculate_correlation(df, cross_rel)
    calculate_cond_probs(df, factor_p)
    normalize_cond_probs(df, factor_s)
    return generate_prob_matrices(df, factor_s)

In [14]:
def generate_cycle_product(matrices):
    result = [mat.copy() for mat in matrices]
    cycle_iter = itertools.cycle(matrices)
    for i in range(len(matrices) - 1):
        next(cycle_iter)
        for i in range(len(matrices)):
            result[i] = result[i] @ next(cycle_iter)
    return result

In [15]:
def find_eigenvec_near_one(matrices, eps=1e-8):
    result = []
    for matrix in matrices:
        vals, vecs = np.linalg.eig(matrix)
        mask = np.isclose(vals, 1, atol=eps)
        needed_vec = vecs[:, mask].flatten()
        needed_vec = np.real_if_close(needed_vec)
        result.append(needed_vec / needed_vec.sum())
        if (result[-1] < 0).any():
            raise RuntimeWarning("Probability vector has negative elements.")
    return result

In [16]:
outer_matrices = create_matrix_set(df_outer, outer_f_s, outer_f_p, outer_cross_rel)
outer_matrices_cycled = generate_cycle_product(outer_matrices)
outer_real_probs = find_eigenvec_near_one(outer_matrices_cycled)

In [17]:
outer_real_probs

[array([ 0.33007763,  0.0867728 ,  0.01690569,  0.56624388]),
 array([ 0.05419201,  0.94580799]),
 array([ 0.22393838,  0.14324562,  0.01388094,  0.61893506]),
 array([ 0.80673027,  0.00394841,  0.18932131]),
 array([ 1.]),
 array([ 0.19313308,  0.80686692]),
 array([ 0.80238419,  0.03073636,  0.16687945])]