# How to infer latent preferences directly from data

In [3]:
import pandas as pd
import matplotlib.pyplot as plt

In [4]:
from multisoc.infer import data_loader
from multisoc.infer import wrappers
from multisoc.infer import MRQAP

## Load the data
The nodes have three properties (shape, color, number) and their connections are stored in the edges dataframe.

In [5]:
nodes_dummy = pd.read_csv("nodes_dummy.csv",index_col="index",dtype='category')
edges_dummy = pd.read_csv("edges_dummy.csv",dtype='category')

In [18]:
# import requests # pip install requests
# from io import StringIO

# edges_dummy_url = 'https://pastebin.com/raw/1pqauPwz'
# nodes_dummy_url = 'https://pastebin.com/raw/eNp61ptx'

# contents = requests.get(edges_dummy_url).text.split('\n')
# edges_dummy2 = pd.read_csv(StringIO("".join(contents)),dtype='category')

# contents = requests.get(nodes_dummy_url).text.split('\n')
# nodes_dummy2 = pd.read_csv(StringIO("".join(contents)),dtype='category',index_col="index")

## Explicitly choose the dimensions and categories in each dimension to perform the analysis

In [6]:
dimensions_list = ['number','color','shape']
num_dimensions = len(dimensions_list)

shape_list = ["Circle","Square"]
color_list = ["Blue","Red"]
number_list = ["1","2","3","4","5","6"]

all_attributes_dict = {
    "shape":shape_list,
    "color":color_list,
    "number":number_list
}

## Convert the loaded data to adequate input
We include the nodes information in the edges dataframe so that the inference functions can process it.

In [7]:
nodes_input, edges_input = data_loader.build_nodes_edges_input_df(nodes_dummy, edges_dummy, dimensions=["shape","color","number"])

In [8]:
edges_input

Unnamed: 0,source,target,source shape,target shape,source color,target color,source number,target number
0,0,1,Square,Circle,Blue,Blue,3,3
1,0,23,Square,Circle,Blue,Blue,3,3
2,0,41,Square,Square,Blue,Blue,3,3
3,0,63,Square,Square,Blue,Red,3,3
4,0,103,Square,Circle,Blue,Blue,3,3
...,...,...,...,...,...,...,...,...
986,252,221,Circle,Square,Blue,Blue,6,5
987,252,232,Circle,Circle,Blue,Blue,6,6
988,252,248,Circle,Square,Blue,Blue,6,4
989,253,31,Circle,Circle,Red,Red,3,3


## Infer latent one-dimensional preferences and full multidimensional preferences (for reference)

In [9]:
results_1d_dct = wrappers.infer_latent_preferences_1dSimple(
    nodes_input,
    edges_input,
    dimensions_list, 
    all_attributes_dict,
    type_p = "and" ## Type of aggregation function {and,or,mean}
    )

Likelihood maximization convergence result: CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH


In [10]:
res_dct_multi = wrappers.infer_latent_preferences_Multidimensional(
    nodes_input,
    edges_input,
    dimensions_list, 
    all_attributes_dict)

The dictionary of results contains:
- The multidimensional group sizes
- The number of links across multidimensional groups
- The one-dimensional group sizes
- The number of links across one-dimensional groups
- The inferred 1D latent preferences
- The diagonal-normalized 1D latent preferences
- The estimated multidimensional preferences computed with the aggregation function
- The model performance metrics (likelihood, AIC, BIC)

In [11]:
results_1d_dct.keys()

dict_keys(['multidimensional_population', 'multidimensional_links', 'one_dimensional_population', 'one_dimensional_links', 'H_multidimensional_preferences', 'Likelihood', 'AIC', 'BIC', 'h_inferred_latent_preferences', 'h_normalized_inferred_latent_preferences'])

In [12]:
results_1d_dct["multidimensional_links"].style.background_gradient(cmap="viridis")

Unnamed: 0_level_0,Unnamed: 1_level_0,number,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,6,6,6,6
Unnamed: 0_level_1,Unnamed: 1_level_1,color,Blue,Blue,Red,Red,Blue,Blue,Red,Red,Blue,Blue,Red,Red,Blue,Blue,Red,Red,Blue,Blue,Red,Red,Blue,Blue,Red,Red
Unnamed: 0_level_2,Unnamed: 1_level_2,shape,Circle,Square,Circle,Square,Circle,Square,Circle,Square,Circle,Square,Circle,Square,Circle,Square,Circle,Square,Circle,Square,Circle,Square,Circle,Square,Circle,Square
number,color,shape,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3,Unnamed: 22_level_3,Unnamed: 23_level_3,Unnamed: 24_level_3,Unnamed: 25_level_3,Unnamed: 26_level_3
1,Blue,Circle,10,8,0,0,0,1,0,0,1,1,0,0,2,0,0,0,0,0,0,0,0,1,0,0
1,Blue,Square,4,10,1,0,0,0,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Red,Circle,0,0,8,2,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,1
1,Red,Square,0,0,2,0,0,0,0,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0
2,Blue,Circle,1,2,1,0,36,7,3,4,7,11,0,1,2,3,1,0,2,0,1,0,0,0,0,0
2,Blue,Square,0,1,0,0,3,7,0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0
2,Red,Circle,0,0,2,1,1,0,8,10,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Red,Square,0,0,0,1,6,0,9,40,0,1,6,4,1,0,7,2,0,0,1,0,0,0,4,0
3,Blue,Circle,0,3,0,0,6,2,0,0,36,21,0,2,4,1,0,0,6,2,0,1,4,3,0,0
3,Blue,Square,0,3,0,0,1,0,0,0,16,35,0,1,1,4,0,0,2,4,0,0,4,3,0,0


In [14]:
results_1d_dct["h_normalized_inferred_latent_preferences"][0].style.background_gradient(cmap="viridis",vmin=0,vmax=1)

number,1,2,3,4,5,6
number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,1.0,0.077015,0.105561,0.048261,0.01541,0.049469
2,0.107688,1.0,0.23333,0.122006,0.054738,0.039144
3,0.065953,0.073068,1.0,0.161222,0.136459,0.154399
4,0.0,0.065914,0.144913,1.0,0.139048,0.078766
5,0.0,0.009841,0.081295,0.1573,1.0,0.214753
6,0.010012,0.0,0.081949,0.053307,0.06865,1.0


In [17]:
results_1d_dct["h_normalized_inferred_latent_preferences"][1].style.background_gradient(cmap="viridis",vmin=0,vmax=1)

color,Blue,Red
color,Unnamed: 1_level_1,Unnamed: 2_level_1
Blue,1.0,0.062339
Red,0.050738,1.0


In [16]:
results_1d_dct["h_normalized_inferred_latent_preferences"][2].style.background_gradient(cmap="viridis",vmin=0,vmax=1)

shape,Circle,Square
shape,Unnamed: 1_level_1,Unnamed: 2_level_1
Circle,1.0,0.6978
Square,0.623038,1.0


## Randomize the network with MRQAP to obtain p-values
The result includes averages and standard deviations of latent preferences (raw and normalized) in randomized networks as well as the p-values of the inferred preferences

In [13]:
mrqap_res = MRQAP.MRQAP_1dSimple(
    nodes_input,
    edges_input,
    dimensions_list,
    all_attributes_dict,
    mrqap_iter=100,
    aggr_fun="and")

  0%|          | 0/100 [00:00<?, ?it/s]

In [14]:
mrqap_res

{'MRQAP_av_h': [number         1         2         3         4         5         6
  number                                                            
  1       0.269677  0.264555  0.260974  0.266027  0.260962  0.250954
  2       0.263924  0.262173  0.263723  0.262632  0.261059  0.251613
  3       0.263749  0.260554  0.274385  0.267630  0.263283  0.272737
  4       0.276745  0.266958  0.270165  0.267415  0.266855  0.265859
  5       0.267023  0.264183  0.269617  0.263971  0.265459  0.254412
  6       0.273540  0.256789  0.268035  0.265507  0.262199  0.257247,
  color      Blue       Red
  color                    
  Blue   0.274819  0.274026
  Red    0.275367  0.276934,
  shape     Circle    Square
  shape                     
  Circle  0.276067  0.277066
  Square  0.273737  0.272368],
 'MRQAP_std_h': [number         1         2         3         4         5         6
  number                                                            
  1       0.092863  0.069014  0.067780  0.055737 

The averages of the normalized latent preferences naturally tend to be around 1, as they are all close to neutrality (we removed the inter-group dependencies by randomizing the network)

In [15]:
mrqap_res["MRQAP_av_h_norm"][0].style.background_gradient(cmap="viridis")

number,1,2,3,4,5,6
number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,1.0,1.124079,1.137321,1.15028,1.12965,1.091111
2,1.060461,1.0,1.051496,1.049617,1.043475,1.012812
3,1.015128,0.998062,1.0,1.028,1.008889,1.049233
4,1.077186,1.043178,1.062487,1.0,1.043039,1.036111
5,1.082399,1.078064,1.105799,1.074574,1.0,1.038286
6,1.159287,1.081078,1.131845,1.131882,1.117348,1.0
