# How to infer latent preferences directly from data

In [2]:
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
from netin.multidim.infer import data_loader
from netin.multidim.infer import wrappers
from netin.multidim.infer import MRQAP

## Load the data
The nodes have three properties (shape, color, number) and their connections are stored in the edges dataframe.

In [4]:
# nodes_dummy = pd.read_csv("nodes_dummy.csv",index_col="index",dtype='category')
# edges_dummy = pd.read_csv("edges_dummy.csv",dtype='category')

In [5]:
import requests # pip install requests
from io import StringIO

edges_dummy_url = 'https://pastebin.com/raw/1pqauPwz'
nodes_dummy_url = 'https://pastebin.com/raw/eNp61ptx'

contents = requests.get(edges_dummy_url).text.split('\n')
edges_dummy = pd.read_csv(StringIO("".join(contents)),dtype='category')

contents = requests.get(nodes_dummy_url).text.split('\n')
nodes_dummy = pd.read_csv(StringIO("".join(contents)),dtype='category',index_col="index")

In [6]:
nodes_dummy

Unnamed: 0_level_0,shape,color,number
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,Square,Blue,3
1,Circle,Blue,3
2,Square,Red,5
3,Square,Blue,3
4,Square,Red,1
...,...,...,...
249,Square,Red,4
250,Square,Red,1
251,Square,Red,6
252,Circle,Blue,6


In [7]:
edges_dummy

Unnamed: 0,source,target
0,0,1
1,0,23
2,0,41
3,0,63
4,0,103
...,...,...
986,252,221
987,252,232
988,252,248
989,253,31


## Explicitly choose the dimensions and categories in each dimension to perform the analysis

In [8]:
dimensions_list = ['number','color','shape']
num_dimensions = len(dimensions_list)

shape_list = ["Circle","Square"]
color_list = ["Blue","Red"]
number_list = ["1","2","3","4","5","6"]

all_attributes_dict = {
    "shape":shape_list,
    "color":color_list,
    "number":number_list
}

## Convert the loaded data to adequate input
We include the nodes information in the edges dataframe so that the inference functions can process it.

In [9]:
nodes_input, edges_input = data_loader.build_nodes_edges_input_df(nodes_dummy, edges_dummy, dimensions=["shape","color","number"])

In [10]:
edges_input

Unnamed: 0,source,target,source shape,target shape,source color,target color,source number,target number
0,0,1,Square,Circle,Blue,Blue,3,3
1,0,23,Square,Circle,Blue,Blue,3,3
2,0,41,Square,Square,Blue,Blue,3,3
3,0,63,Square,Square,Blue,Red,3,3
4,0,103,Square,Circle,Blue,Blue,3,3
...,...,...,...,...,...,...,...,...
986,252,221,Circle,Square,Blue,Blue,6,5
987,252,232,Circle,Circle,Blue,Blue,6,6
988,252,248,Circle,Square,Blue,Blue,6,4
989,253,31,Circle,Circle,Red,Red,3,3


## Infer latent one-dimensional preferences and full multidimensional preferences (for reference)

In [11]:
results_1d_dct = wrappers.infer_latent_preferences_1dSimple(
    nodes_input,
    edges_input,
    dimensions_list, 
    all_attributes_dict,
    type_p = "and" ## Type of aggregation function {and,or,mean}
    )

Likelihood maximization convergence result: CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH


In [12]:
res_dct_multi = wrappers.infer_latent_preferences_Multidimensional(
    nodes_input,
    edges_input,
    dimensions_list, 
    all_attributes_dict)

The dictionary of results contains:
- The multidimensional group sizes
- The number of links across multidimensional groups
- The one-dimensional group sizes
- The number of links across one-dimensional groups
- The inferred 1D latent preferences
- The diagonal-normalized 1D latent preferences
- The estimated multidimensional preferences computed with the aggregation function
- The model performance metrics (likelihood, AIC, BIC)

In [13]:
results_1d_dct.keys()

dict_keys(['multidimensional_population', 'multidimensional_links', 'one_dimensional_population', 'one_dimensional_links', 'H_multidimensional_preferences', 'Likelihood', 'AIC', 'BIC', 'h_inferred_latent_preferences', 'h_normalized_inferred_latent_preferences'])

In [14]:
results_1d_dct["multidimensional_links"].style.background_gradient(cmap="viridis")

Unnamed: 0_level_0,Unnamed: 1_level_0,number,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,6,6,6,6
Unnamed: 0_level_1,Unnamed: 1_level_1,color,Blue,Blue,Red,Red,Blue,Blue,Red,Red,Blue,Blue,Red,Red,Blue,Blue,Red,Red,Blue,Blue,Red,Red,Blue,Blue,Red,Red
Unnamed: 0_level_2,Unnamed: 1_level_2,shape,Circle,Square,Circle,Square,Circle,Square,Circle,Square,Circle,Square,Circle,Square,Circle,Square,Circle,Square,Circle,Square,Circle,Square,Circle,Square,Circle,Square
number,color,shape,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3,Unnamed: 22_level_3,Unnamed: 23_level_3,Unnamed: 24_level_3,Unnamed: 25_level_3,Unnamed: 26_level_3
1,Blue,Circle,10,8,0,0,0,1,0,0,1,1,0,0,2,0,0,0,0,0,0,0,0,1,0,0
1,Blue,Square,4,10,1,0,0,0,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Red,Circle,0,0,8,2,1,0,1,1,0,0,1,1,0,0,0,1,0,0,0,1,0,0,0,1
1,Red,Square,0,0,2,0,0,0,0,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0
2,Blue,Circle,1,2,1,0,36,7,3,4,7,11,0,1,2,3,1,0,2,0,1,0,0,0,0,0
2,Blue,Square,0,1,0,0,3,7,0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0
2,Red,Circle,0,0,2,1,1,0,8,10,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Red,Square,0,0,0,1,6,0,9,40,0,1,6,4,1,0,7,2,0,0,1,0,0,0,4,0
3,Blue,Circle,0,3,0,0,6,2,0,0,36,21,0,2,4,1,0,0,6,2,0,1,4,3,0,0
3,Blue,Square,0,3,0,0,1,0,0,0,16,35,0,1,1,4,0,0,2,4,0,0,4,3,0,0


In [14]:
results_1d_dct["h_normalized_inferred_latent_preferences"][0].style.background_gradient(cmap="viridis",vmin=0,vmax=1)

number,1,2,3,4,5,6
number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,1.0,0.077015,0.105561,0.048261,0.01541,0.049469
2,0.107688,1.0,0.23333,0.122006,0.054738,0.039144
3,0.065953,0.073068,1.0,0.161222,0.136459,0.154399
4,0.0,0.065914,0.144913,1.0,0.139048,0.078766
5,0.0,0.009841,0.081295,0.1573,1.0,0.214753
6,0.010012,0.0,0.081949,0.053307,0.06865,1.0


In [15]:
results_1d_dct["h_normalized_inferred_latent_preferences"][1].style.background_gradient(cmap="viridis",vmin=0,vmax=1)

color,Blue,Red
color,Unnamed: 1_level_1,Unnamed: 2_level_1
Blue,1.0,0.062339
Red,0.050738,1.0


In [16]:
results_1d_dct["h_normalized_inferred_latent_preferences"][2].style.background_gradient(cmap="viridis",vmin=0,vmax=1)

shape,Circle,Square
shape,Unnamed: 1_level_1,Unnamed: 2_level_1
Circle,1.0,0.6978
Square,0.623038,1.0


## Randomize the network with MRQAP to obtain p-values
The result includes averages and standard deviations of latent preferences (raw and normalized) in randomized networks as well as the p-values of the inferred preferences

In [17]:
mrqap_res = MRQAP.MRQAP_1dSimple(
    nodes_input,
    edges_input,
    dimensions_list,
    all_attributes_dict,
    mrqap_iter=100,
    aggr_fun="and")

  0%|          | 0/100 [00:00<?, ?it/s]

In [18]:
mrqap_res

{'MRQAP_av_h': [number         1         2         3         4         5         6
  number                                                            
  1       0.266793  0.256156  0.271929  0.253783  0.269508  0.263489
  2       0.267609  0.260459  0.263717  0.259248  0.265892  0.263932
  3       0.273902  0.272553  0.273146  0.259049  0.263237  0.265516
  4       0.267232  0.265786  0.263825  0.254801  0.264604  0.269697
  5       0.279588  0.255692  0.262557  0.259350  0.260371  0.260081
  6       0.278668  0.256739  0.271621  0.264283  0.267555  0.267418,
  color      Blue       Red
  color                    
  Blue   0.275721  0.273665
  Red    0.276919  0.273332,
  shape     Circle    Square
  shape                     
  Circle  0.272088  0.280294
  Square  0.272391  0.276047],
 'MRQAP_std_h': [number         1         2         3         4         5         6
  number                                                            
  1       0.087304  0.052480  0.057143  0.055529 

The averages of the normalized latent preferences naturally tend to be around 1, as they are all close to neutrality (we removed the inter-group dependencies by randomizing the network)

In [20]:
mrqap_res["MRQAP_av_h_norm"][0].style.background_gradient(cmap="viridis")

number,1,2,3,4,5,6
number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,1.0,1.095793,1.135064,1.053481,1.130349,1.111009
2,1.086283,1.0,1.068898,1.049767,1.073346,1.074293
3,1.05803,1.043212,1.0,0.994435,1.023254,1.029095
4,1.075136,1.083533,1.074296,1.0,1.075254,1.101121
5,1.145788,1.058102,1.093059,1.065831,1.0,1.079267
6,1.123991,1.022844,1.100252,1.081637,1.07013,1.0
