In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
sns.set()

import networkx as nx

In [2]:
subjects = ["soggetto_1","soggetto_2","soggetto_3","soggetto_6","soggetto_7","soggetto_8"]

In [3]:
df_weights_dict = {}
for subject in subjects:
    df_weights_dict[subject] = pd.read_csv("Datasets/{}_weights_o.csv".format(subject))
    df_weights_dict[subject].columns = ["TrRi","RiTr","Plu","Ttra","Ttrb","Tr","MeTr","MoTr","RiRi","MoFo","Mmof","MoRi","MeRi","Ri","Comp"]
    df_weights_dict[subject].index = df_weights_dict[subject].columns
    df_weights_dict[subject] = df_weights_dict[subject].T

In [4]:
df_weights_dict["soggetto_1"]

Unnamed: 0,TrRi,RiTr,Plu,Ttra,Ttrb,Tr,MeTr,MoTr,RiRi,MoFo,Mmof,MoRi,MeRi,Ri,Comp
TrRi,-0.770991,-0.247862,0.296357,0.18306,0.184627,-0.10107,0.283836,-0.350846,-0.124972,0.23545,0.137425,-0.531965,0.264298,0.166439,-0.10905
RiTr,-0.751945,-0.213791,0.143637,0.483585,0.094576,0.339746,-0.172645,-0.11804,-0.444297,0.209074,0.092889,-0.012311,0.233756,0.050772,-0.083759
Plu,-0.003196,-0.263307,-0.274596,0.501303,0.255244,0.10413,0.084332,0.095562,-0.265006,0.323443,0.318363,-0.268569,-0.447416,0.243699,-0.026358
Ttra,-0.475967,-0.45279,0.401257,0.105587,0.079634,0.175691,-0.112213,0.11543,-0.028899,0.554424,0.219523,0.330389,0.093097,0.464091,0.069246
Ttrb,-0.288003,-0.036305,-0.513654,-0.26976,0.433292,0.178291,-0.095082,-0.201036,0.315952,0.071989,0.547407,-0.07766,0.216503,-0.664574,0.217016
Tr,-0.319896,-0.265057,-0.032019,0.243317,0.108078,0.192165,-0.118429,0.017604,-0.310293,0.374774,0.242819,-0.33965,-0.126303,-0.043444,0.013585
MeTr,-0.237846,-0.040311,-0.60653,0.46555,0.718556,-0.148726,0.377106,0.278993,-0.491082,0.079382,-0.216052,0.582619,0.145657,-0.663608,0.802352
MoTr,-0.667909,-0.216138,0.466011,0.118819,0.207032,0.298971,-0.212882,-0.181154,-0.395986,0.243083,0.133172,-0.161037,-0.140548,0.324216,-0.157806
RiRi,-0.311236,0.195006,0.234262,0.741436,0.218471,0.257728,0.259866,-0.201408,-0.170462,0.017236,0.197097,0.152464,-0.464821,0.192691,-0.134792
MoFo,-0.313909,-0.285269,0.095813,0.158044,0.145419,0.369778,-0.511182,0.201035,-0.281086,0.42754,0.252716,-0.123318,-0.092667,0.153888,0.135851


In [5]:
# for each subject's dataframe create two numpy arrays, one with positive values and one for negatives
df_weights_pos_dict = {}
df_weights_neg_dict = {}
df_ini_dict = {}
for subject in subjects:
    df_weights_pos_dict[subject] = df_weights_dict[subject].where(df_weights_dict[subject] >= 0)
    df_weights_neg_dict[subject] = df_weights_dict[subject].where(df_weights_dict[subject] < 0)
    # put the other values to zeros
    df_weights_pos_dict[subject] = df_weights_pos_dict[subject].fillna(0)
    df_weights_neg_dict[subject] = df_weights_neg_dict[subject].fillna(0)
    # transform the negative values of df_n_dict[subject] in positives
    df_weights_neg_dict[subject] = df_weights_neg_dict[subject] * (-1)

In [6]:
df_weights_pos_dict["soggetto_1"]

Unnamed: 0,TrRi,RiTr,Plu,Ttra,Ttrb,Tr,MeTr,MoTr,RiRi,MoFo,Mmof,MoRi,MeRi,Ri,Comp
TrRi,0.0,0.0,0.296357,0.18306,0.184627,0.0,0.283836,0.0,0.0,0.23545,0.137425,0.0,0.264298,0.166439,0.0
RiTr,0.0,0.0,0.143637,0.483585,0.094576,0.339746,0.0,0.0,0.0,0.209074,0.092889,0.0,0.233756,0.050772,0.0
Plu,0.0,0.0,0.0,0.501303,0.255244,0.10413,0.084332,0.095562,0.0,0.323443,0.318363,0.0,0.0,0.243699,0.0
Ttra,0.0,0.0,0.401257,0.105587,0.079634,0.175691,0.0,0.11543,0.0,0.554424,0.219523,0.330389,0.093097,0.464091,0.069246
Ttrb,0.0,0.0,0.0,0.0,0.433292,0.178291,0.0,0.0,0.315952,0.071989,0.547407,0.0,0.216503,0.0,0.217016
Tr,0.0,0.0,0.0,0.243317,0.108078,0.192165,0.0,0.017604,0.0,0.374774,0.242819,0.0,0.0,0.0,0.013585
MeTr,0.0,0.0,0.0,0.46555,0.718556,0.0,0.377106,0.278993,0.0,0.079382,0.0,0.582619,0.145657,0.0,0.802352
MoTr,0.0,0.0,0.466011,0.118819,0.207032,0.298971,0.0,0.0,0.0,0.243083,0.133172,0.0,0.0,0.324216,0.0
RiRi,0.0,0.195006,0.234262,0.741436,0.218471,0.257728,0.259866,0.0,0.0,0.017236,0.197097,0.152464,0.0,0.192691,0.0
MoFo,0.0,0.0,0.095813,0.158044,0.145419,0.369778,0.0,0.201035,0.0,0.42754,0.252716,0.0,0.0,0.153888,0.135851


In [7]:
df_weights_neg_dict["soggetto_1"]

Unnamed: 0,TrRi,RiTr,Plu,Ttra,Ttrb,Tr,MeTr,MoTr,RiRi,MoFo,Mmof,MoRi,MeRi,Ri,Comp
TrRi,0.770991,0.247862,-0.0,-0.0,-0.0,0.10107,-0.0,0.350846,0.124972,-0.0,-0.0,0.531965,-0.0,-0.0,0.10905
RiTr,0.751945,0.213791,-0.0,-0.0,-0.0,-0.0,0.172645,0.11804,0.444297,-0.0,-0.0,0.012311,-0.0,-0.0,0.083759
Plu,0.003196,0.263307,0.274596,-0.0,-0.0,-0.0,-0.0,-0.0,0.265006,-0.0,-0.0,0.268569,0.447416,-0.0,0.026358
Ttra,0.475967,0.45279,-0.0,-0.0,-0.0,-0.0,0.112213,-0.0,0.028899,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0
Ttrb,0.288003,0.036305,0.513654,0.26976,-0.0,-0.0,0.095082,0.201036,-0.0,-0.0,-0.0,0.07766,-0.0,0.664574,-0.0
Tr,0.319896,0.265057,0.032019,-0.0,-0.0,-0.0,0.118429,-0.0,0.310293,-0.0,-0.0,0.33965,0.126303,0.043444,-0.0
MeTr,0.237846,0.040311,0.60653,-0.0,-0.0,0.148726,-0.0,-0.0,0.491082,-0.0,0.216052,-0.0,-0.0,0.663608,-0.0
MoTr,0.667909,0.216138,-0.0,-0.0,-0.0,-0.0,0.212882,0.181154,0.395986,-0.0,-0.0,0.161037,0.140548,-0.0,0.157806
RiRi,0.311236,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,0.201408,0.170462,-0.0,-0.0,-0.0,0.464821,-0.0,0.134792
MoFo,0.313909,0.285269,-0.0,-0.0,-0.0,-0.0,0.511182,-0.0,0.281086,-0.0,-0.0,0.123318,0.092667,-0.0,-0.0


In [8]:
# for every weight matrix, create a directed graph
# create one node for each column
# create an edge for each non-zero value with the weight as the edge weight given by the matrix

G_dict_pos = {}
G_dict_neg = {}
for subject in subjects:
    # add the nodes
    G_dict_pos[subject] = nx.DiGraph()
    G_dict_neg[subject] = nx.DiGraph()
    G_dict_pos[subject].add_nodes_from(df_weights_pos_dict[subject].columns)
    G_dict_neg[subject].add_nodes_from(df_weights_neg_dict[subject].columns)

    # add the edges with the weights
    for i in range(len(df_weights_pos_dict[subject].columns)):
        for j in range(len(df_weights_pos_dict[subject].columns)):
            if df_weights_pos_dict[subject].iloc[i,j] != 0:
                G_dict_pos[subject].add_edge(df_weights_pos_dict[subject].columns[i],df_weights_pos_dict[subject].columns[j],weight=df_weights_pos_dict[subject].iloc[i,j])
            if df_weights_neg_dict[subject].iloc[i,j] != 0:
                G_dict_neg[subject].add_edge(df_weights_neg_dict[subject].columns[i],df_weights_neg_dict[subject].columns[j],weight=df_weights_neg_dict[subject].iloc[i,j])

In [10]:
G_dict_pos["soggetto_1"].edges(data=True)

OutEdgeDataView([('TrRi', 'Plu', {'weight': 0.29635653}), ('TrRi', 'Ttra', {'weight': 0.18305951}), ('TrRi', 'Ttrb', {'weight': 0.18462712}), ('TrRi', 'MeTr', {'weight': 0.28383553}), ('TrRi', 'MoFo', {'weight': 0.23544988}), ('TrRi', 'Mmof', {'weight': 0.13742514}), ('TrRi', 'MeRi', {'weight': 0.2642979}), ('TrRi', 'Ri', {'weight': 0.16643852}), ('RiTr', 'Plu', {'weight': 0.14363717}), ('RiTr', 'Ttra', {'weight': 0.48358506}), ('RiTr', 'Ttrb', {'weight': 0.09457647}), ('RiTr', 'Tr', {'weight': 0.33974558}), ('RiTr', 'MoFo', {'weight': 0.20907383}), ('RiTr', 'Mmof', {'weight': 0.09288936}), ('RiTr', 'MeRi', {'weight': 0.23375629}), ('RiTr', 'Ri', {'weight': 0.05077204}), ('Plu', 'Ttra', {'weight': 0.5013026}), ('Plu', 'Ttrb', {'weight': 0.25524414}), ('Plu', 'Tr', {'weight': 0.10413005}), ('Plu', 'MeTr', {'weight': 0.08433197}), ('Plu', 'MoTr', {'weight': 0.09556225}), ('Plu', 'MoFo', {'weight': 0.32344252}), ('Plu', 'Mmof', {'weight': 0.31836274}), ('Plu', 'Ri', {'weight': 0.24369873}

In [11]:
G_dict_neg["soggetto_1"].edges(data=True)

OutEdgeDataView([('TrRi', 'TrRi', {'weight': 0.77099067}), ('TrRi', 'RiTr', {'weight': 0.24786153}), ('TrRi', 'Tr', {'weight': 0.101070374}), ('TrRi', 'MoTr', {'weight': 0.35084584}), ('TrRi', 'RiRi', {'weight': 0.124972}), ('TrRi', 'MoRi', {'weight': 0.5319655}), ('TrRi', 'Comp', {'weight': 0.109049834}), ('RiTr', 'TrRi', {'weight': 0.75194544}), ('RiTr', 'RiTr', {'weight': 0.21379124}), ('RiTr', 'MeTr', {'weight': 0.17264487}), ('RiTr', 'MoTr', {'weight': 0.11804}), ('RiTr', 'RiRi', {'weight': 0.4442972}), ('RiTr', 'MoRi', {'weight': 0.012310727}), ('RiTr', 'Comp', {'weight': 0.08375879}), ('Plu', 'TrRi', {'weight': 0.0031957452}), ('Plu', 'RiTr', {'weight': 0.26330683}), ('Plu', 'Plu', {'weight': 0.27459618}), ('Plu', 'RiRi', {'weight': 0.2650064}), ('Plu', 'MoRi', {'weight': 0.26856896}), ('Plu', 'MeRi', {'weight': 0.447416}), ('Plu', 'Comp', {'weight': 0.0263581}), ('Ttra', 'TrRi', {'weight': 0.47596684}), ('Ttra', 'RiTr', {'weight': 0.4527903}), ('Ttra', 'MeTr', {'weight': 0.1122

In [13]:
# for each graph, compute the PageRank and organize the results in a dataframe
# the dataframe will have the following columns:
# - node: the name of the node
# - page_rank: the PageRank value of the node
# - subject: the subject to which the node belongs

df_page_rank_dict_pos = {}
df_page_rank_dict_neg = {}
for subject in subjects:
    df_page_rank_dict_pos[subject] = pd.DataFrame(list(nx.pagerank( G_dict_pos[subject], max_iter=100).items()),columns=["node","page_rank"])
    df_page_rank_dict_neg[subject] = pd.DataFrame(list(nx.pagerank( G_dict_neg[subject], max_iter=100).items()),columns=["node","page_rank"])

In [14]:
df_page_rank_dict_pos["soggetto_1"]

Unnamed: 0,node,page_rank
0,TrRi,0.01
1,RiTr,0.028557
2,Plu,0.064951
3,Ttra,0.123073
4,Ttrb,0.105468
5,Tr,0.099804
6,MeTr,0.052258
7,MoTr,0.034065
8,RiRi,0.031185
9,MoFo,0.134568


In [15]:
df_page_rank_dict_neg["soggetto_1"]

Unnamed: 0,node,page_rank
0,TrRi,0.218288
1,RiTr,0.094254
2,Plu,0.043364
3,Ttra,0.036618
4,Ttrb,0.015153
5,Tr,0.036472
6,MeTr,0.044892
7,MoTr,0.084514
8,RiRi,0.115733
9,MoFo,0.012968


In [36]:
df_page_rank_dict_difference = {}
for subject in subjects:
    df_page_rank_dict_difference[subject] = pd.merge(df_page_rank_dict_pos[subject],df_page_rank_dict_neg[subject],on="node",suffixes=("_pos","_neg"))
    df_page_rank_dict_difference[subject]["difference"] = df_page_rank_dict_difference[subject]["page_rank_pos"] - df_page_rank_dict_difference[subject]["page_rank_neg"]
    df_page_rank_dict_difference[subject].set_index("node",inplace=True)

In [42]:
# create a dataframe with the std of the difference of the page rank for each node
df_page_rank_std = pd.DataFrame()
for subject in subjects:
    df_page_rank_std = df_page_rank_std.append(df_page_rank_dict_difference[subject].std().to_frame().T)
df_page_rank_std.index = subjects
df_page_rank_std.drop(columns=["page_rank_pos","page_rank_neg"],inplace=True)
df_page_rank_std


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



Unnamed: 0,difference
soggetto_1,0.090028
soggetto_2,0.071766
soggetto_3,0.053338
soggetto_6,0.04216
soggetto_7,0.05213
soggetto_8,0.042332


In [43]:
# plot df_page_rank_std with plotly express using a bar chart
fig = px.bar(df_page_rank_std, x=df_page_rank_std.index, y="difference", color="difference", color_continuous_scale=px.colors.sequential.RdBu)
fig.show()