# Морфологічний аналіз

In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import seaborn as sns
sns.set_style('whitegrid')
# Jupyter rendering stuff
from IPython.display import display, Math, Latex, HTML, clear_output

In [2]:
import itertools

## Input data

In [3]:
# def generate_probabilities(*sizes):
#     result = [np.random.random(size) for size in sizes]
#     for vec in result:
#         vec /= np.sum(vec)
#     return result

In [4]:
# def generate_cross_relation(size1, size2):
#     return np.random.random(size=(size1, size2)) * 2 - 1

In [5]:
# outer_f_s = (10, 10, 10, 10, 10, 10)
# outer_f_p = generate_probabilities(*outer_f_s)
# inner_f_s = (10, 10)
# inner_f_p = generate_probabilities(*inner_f_s)

In [6]:
# outer_rel = {(i, j):generate_cross_relation(s1, s2) for (i,s1), (j,s2) in itertools.combinations(enumerate(outer_f_s), 2)}
# inner_rel = {(i, j):generate_cross_relation(s1, s2) for (i,s1), (j,s2) in itertools.combinations(enumerate(inner_f_s), 2)}
# cross_rel = {(i, j):generate_cross_relation(s1, s2) for (i,s1), (j,s2) in itertools.product(enumerate(outer_f_s), enumerate(inner_f_s))}

In [7]:
# presenation example
outer_f_s = (3, 4, 2) # внешние факторы - размеры столбиков
outer_f_p = [np.array([0.3, 0.5, 0.2]), # вероятности
             np.array([0.4, 0.3, 0.1, 0.2]),
             np.array([0.3, 0.7])]
inner_f_s = (3, 4, 2) # внутренние факторы - размеры столбиков
inner_f_p = [np.array([0.3, 0.5, 0.2]), # вероятности
             np.array([0.4, 0.3, 0.1, 0.2]),
             np.array([0.3, 0.7])]
outer_rel = {(i, j):np.zeros((s1, s2)) for (i,s1), (j,s2) in itertools.combinations(enumerate(outer_f_s), 2)} # связи между внешними
outer_rel[(0,1)][0,0] = 0.5
outer_rel[(0,1)][2,1] = -0.5
outer_rel[(0,2)][0,0] = 0.2
outer_rel[(0,2)][1,0] = 0.3
outer_rel[(1,2)][0,0] = 0.5
outer_rel[(1,2)][2,1] = -1
cross_rel = {(i, j):np.zeros((s1, s2)) for (i,s1), (j,s2) in itertools.product(enumerate(outer_f_s), enumerate(inner_f_s))} # связи между внешними и внутренними

## Calculating table for outer factors

In [8]:
df_outer = pd.DataFrame(index=pd.MultiIndex.from_product(tuple(range(size) for size in outer_f_s),
                                                         names=[str(i + 1) for i in range(len(outer_f_s))]))

In [9]:
def calculate_correlation(df, relation):
    factors = (label for label in df.index.labels)
    result = 1
    for (f1,a1), (f2, a2) in itertools.combinations(enumerate(factors), 2):
        result *= relation[f1, f2][a1, a2] + 1
    df['C'] = result

In [10]:
def calculate_cond_probs(df, factor_p):
    factors = (label for label in df.index.labels)
    probs = np.array([factor_p[i][factor] for i, factor in enumerate(factors)])
    probs = np.prod(probs, axis = 0) / probs
    for i in range(len(probs)):
        df_outer['P%d_' % (i + 1)] = probs[i] * df['C']

In [11]:
def normalize_cond_probs(df, factor_s):
    sn = [slice(None)] * len(factor_s)
    for i in range(len(factor_s)):
        sn_cur = sn.copy()
        df['P%d' % (i + 1)] = np.zeros(df.shape[0])
        for j in range(outer_f_s[i]):
            sn_cur[i] = j
            df.loc[tuple(sn_cur), 'P%d' % (i + 1)] = \
            df.loc[tuple(sn_cur), 'P%d_' % (i + 1)].as_matrix() / df.loc[tuple(sn_cur), 'P%d_' % (i + 1)].sum()

In [12]:
def generate_prob_matrices(df, factor_s):
    sn = [slice(None)] * len(factor_s)
    p_matrices = []
    for left in range(len(factor_s)):
        right = (left + 1) % len(factor_s)
        p = np.zeros((factor_s[left], factor_s[right]))
        for i, j in itertools.product(range(factor_s[left]), range(factor_s[right])):
            sn_cur = sn.copy()
            sn_cur[left], sn_cur[right] = i, j
            p[i,j] = df.loc[tuple(sn_cur),'P%d' % (right + 1)].sum()
        p_matrices.append(p)
    return p_matrices

In [13]:
def create_matrix_set(df, factor_s, factor_p, cross_rel):
    calculate_correlation(df, cross_rel)
    calculate_cond_probs(df, factor_p)
    normalize_cond_probs(df, factor_s)
    return generate_prob_matrices(df, factor_s)

In [14]:
def generate_cycle_product(matrices):
    result = [mat.copy() for mat in matrices]
    cycle_iter = itertools.cycle(matrices)
    for i in range(len(matrices) - 1):
        next(cycle_iter)
        for i in range(len(matrices)):
            result[i] = result[i] @ next(cycle_iter)
    return result

In [15]:
def find_eigenvec_near_one(matrices, eps=1e-8):
    result = []
    for matrix in matrices:
        vals, vecs = np.linalg.eig(matrix)
        mask = np.isclose(vals, 1, atol=eps)
        needed_vec = vecs[:, mask].flatten()
        needed_vec = np.real_if_close(needed_vec)
        result.append(needed_vec / needed_vec.sum())
        if (result[-1] < 0).any():
            raise RuntimeWarning("Probability vector has negative elements.")
    return result

In [16]:
outer_matrices = create_matrix_set(df_outer, outer_f_s, outer_f_p, outer_rel)
outer_matrices_cycled = generate_cycle_product(outer_matrices)
outer_real_probs = find_eigenvec_near_one(outer_matrices_cycled)

In [17]:
outer_real_probs

[array([ 0.35405405,  0.49459459,  0.15135135]),
 array([ 0.5154955 ,  0.26027027,  0.0327027 ,  0.19153153]),
 array([ 0.41351351,  0.58648649])]

## Calculating effectiveness R

In [18]:
def calculate_cross_correlation(df, cross_rel, other_f_p):
    for other_f in range(len(other_f_p)):
        result = 1
        for f1,a1 in enumerate(label for label in df.index.labels):
            result *= cross_rel[f1, other_f][a1, :] + 1
        result = result * other_f_p[other_f] # p' * prod{(c+1)} for every alternative
        result = result.T / result.sum(axis=1)
        for i, alt_row in enumerate(result):
            df['R%d_%d' % (other_f + 1, i + 1)] = alt_row

In [19]:
def calculate_joint_probs(df, real_probs):
    sn = [slice(None)] * len(real_probs)
    for i, prob in enumerate(real_probs[0]):
        sn[0] = i
        df.loc[tuple(sn), 'JP'] = df.loc[tuple(sn),'P1'] * prob

In [20]:
def calculate_effectiveness(df, other_f_s):
    effectiveness = []
    for i, factor_s in enumerate(other_f_s):
        factor_eff = np.zeros(factor_s)
        for a in range(factor_s):
            factor_eff[a] = (df['R%d_%d' % (i + 1, a + 1)] * df['JP']).sum()
        effectiveness.append(factor_eff)
    return effectiveness

In [21]:
calculate_cross_correlation(df_outer, cross_rel, inner_f_p)
calculate_joint_probs(df_outer, outer_real_probs)
inner_effectiveness = calculate_effectiveness(df_outer, inner_f_s)

In [22]:
inner_effectiveness

[array([ 0.3,  0.5,  0.2]),
 array([ 0.4,  0.3,  0.1,  0.2]),
 array([ 0.3,  0.7])]

In [23]:
df_outer
# to export to excel, uncomment
#df_outer.to_excel('results.xlsx')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,C,P1_,P2_,P3_,P1,P2,P3,R1_1,R1_2,R1_3,R2_1,R2_2,R2_3,R2_4,R3_1,R3_2,JP
1,2,3,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
0,0,0,2.7,0.324,0.243,0.324,0.247328,0.169871,0.211765,0.3,0.5,0.2,0.4,0.3,0.1,0.2,0.3,0.7,0.087568
0,0,1,1.5,0.42,0.315,0.18,0.320611,0.220203,0.193548,0.3,0.5,0.2,0.4,0.3,0.1,0.2,0.3,0.7,0.113514
0,1,0,1.2,0.108,0.108,0.108,0.082443,0.11215,0.070588,0.3,0.5,0.2,0.4,0.3,0.1,0.2,0.3,0.7,0.029189
0,1,1,1.0,0.21,0.21,0.09,0.160305,0.218069,0.096774,0.3,0.5,0.2,0.4,0.3,0.1,0.2,0.3,0.7,0.056757
0,2,0,1.2,0.036,0.108,0.036,0.027481,0.297521,0.023529,0.3,0.5,0.2,0.4,0.3,0.1,0.2,0.3,0.7,0.00973
0,2,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3,0.5,0.2,0.4,0.3,0.1,0.2,0.3,0.7,0.0
0,3,0,1.2,0.072,0.108,0.072,0.054962,0.101599,0.047059,0.3,0.5,0.2,0.4,0.3,0.1,0.2,0.3,0.7,0.019459
0,3,1,1.0,0.14,0.21,0.06,0.10687,0.197554,0.064516,0.3,0.5,0.2,0.4,0.3,0.1,0.2,0.3,0.7,0.037838
1,0,0,1.95,0.234,0.2925,0.39,0.213115,0.204474,0.254902,0.3,0.5,0.2,0.4,0.3,0.1,0.2,0.3,0.7,0.105405
1,0,1,1.0,0.28,0.35,0.2,0.255009,0.24467,0.215054,0.3,0.5,0.2,0.4,0.3,0.1,0.2,0.3,0.7,0.126126
