In [1]:
import pandas as pd
import networkx as nx
import igraph as ig
import random as rand
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

In [2]:
'''
Two dataframes are directly created from the original files.
'''

epinions_df = pd.read_csv("Data/soc-sign-epinions.txt",sep = '\t', comment = '#', 
                          names = ['FromNodeId', 'ToNodeId', 'Sign'], index_col = False)

slashdot_df = pd.read_csv("Data/soc-sign-Slashdot090221.txt",sep = '\t', comment = '#', 
                          names = ['FromNodeId', 'ToNodeId', 'Sign'], index_col = False)

In [3]:
slashdot_df

Unnamed: 0,FromNodeId,ToNodeId,Sign
0,0,1,1
1,0,2,1
2,0,3,1
3,0,4,1
4,0,5,1
...,...,...,...
549197,82140,81612,1
549198,82141,82129,1
549199,82141,82142,1
549200,82143,81974,1


In [4]:
'''
Cell with code used to parse the wiki data.
'''

filename = "Data/wikiElec.ElecBs3.txt"

with open(filename, errors = 'ignore') as f:
    lines = f.readlines()
    
'''
After opening the file, a list made of the lines of the file is generated, and subsequently treated to have only the lines
featuring the voted person and the people that vote.
'''
    
lines = [x.strip() for x in lines]

splines = [i.split('\t') for i in lines]

parslines = [x for x in splines if (x[0] == 'U' or x[0] == 'V' or x[0] == 'E')]

'''
Then a for loop passes through the whole list, extracting and putting the data of interest in several lists.
'''

fromlist = []
tolist = []
signlist = []
voted_user_id = ''

for line in parslines:
    
    if line[0] == 'U':
        voted_user_id = line[1]
    if ((line[0] == 'V') and (line[1] != '0')):
        fromlist.append(line[2])
        tolist.append(voted_user_id)
        signlist.append(line[1])
        
'''
The dataframe is generated using the aforementioned lists, converted them from string to numeric ones.
'''

wikipedia_df = pd.DataFrame(columns = ['FromNodeId', 'ToNodeId', 'Sign'])

wikipedia_df.FromNodeId = list(map(int,fromlist))
wikipedia_df.ToNodeId = list(map(int,tolist))
wikipedia_df.Sign = list(map(int,signlist))

In [5]:
'''
Cell to do the needed modifications on wikipedia_df.
'''

wikiinv_df = wikipedia_df.reindex(index=wikipedia_df.index[::-1])
wikiinv_df = wikiinv_df.drop_duplicates(subset=['FromNodeId', 'ToNodeId']) 

In [6]:
wiki = wikiinv_df[wikiinv_df.Sign == 1]

In [7]:
wiki

Unnamed: 0,FromNodeId,ToNodeId,Sign
107072,7666,6924,1
107067,7744,8263,1
107066,7047,8263,1
107065,8053,8263,1
107061,3642,6924,1
...,...,...,...
5,7,30,1
4,6,30,1
3,5,30,1
2,4,30,1


In [8]:
'''
This cell converts every dataframe in two graph structures, a networkx and an igraph one, using the function below to
lighten up the code.
'''

def convert_to_graph(df):
    # Input:
    # -- df: a pandas dataframe
    # Output:
    # -- gnx: a networkx graph structure;
    # -- g: a igraph graph structure
    
    gnx = nx.from_pandas_edgelist(df, source = 'FromNodeId', target = 'ToNodeId', edge_attr = 'Sign', 
                                  create_using = nx.DiGraph)
    
    g = ig.Graph.TupleList(gnx.edges(), directed = True)
    
    return gnx, g

#epinions_gnx, epinions_g = convert_to_graph(epinions_df)

slashdot_gnx, slashdot_g = convert_to_graph(slashdot_df)

#wikipedia_gnx, wikipedia_g = convert_to_graph(wikiinv_df)
wikipedia_gnx, wikipedia_g = convert_to_graph(wikiinv_df)

In [9]:
def transpose_trust(G):
    
    sueg = []
    
    m = {v: i for i, v in enumerate(G)}
    
    for v in G:
        
        vnbrs = set(G.pred[v]) | set(G.succ[v])
        for u in vnbrs:
            if m[u] <= m[v]:
                continue
            neighbors = (vnbrs | set(G.succ[u]) | set(G.pred[u])) - {u, v}
            
            for w in neighbors:
                
                if (m[v] < m[w] < m[u]):
                    
                    if (G.has_edge(v, w) and G.has_edge(u, w)):
                        
                        if not(G.has_edge(v, u)):
                            
                            sueg.append([v, u, 0.5])
                        
                        if not(G.has_edge(u, v)):
                            
                            sueg.append([u, v, 0.5])

    return sueg

In [10]:
def direct_propagation(G):
    
    sueg = []
    
    m = {v: i for i, v in enumerate(G)}
    
    for v in G:
        
        vnbrs = set(G.pred[v]) | set(G.succ[v])
        for u in vnbrs:
            if m[u] <= m[v]:
                continue
            neighbors = (vnbrs | set(G.succ[u]) | set(G.pred[u])) - {u, v}
            
            for w in neighbors:
                
                if (m[v] < m[w] < m[u]):
                    
                    if (G.has_edge(v, w) and G.has_edge(w, u) and not(G.has_edge(v, u)) and not([v, u, 0.5] in sueg) and not([v, u, -0.5] in sueg)):
                        
                        if ((G[v][w]['Sign'] == 1) and (G[w][u]['Sign'] == 1)):
                            
                            sueg.append([v, u, 0.5])
                        if ((G[v][w]['Sign'] == -1) and (G[w][u]['Sign'] == -1)):
                            
                            sueg.append([v, u, -0.5])
            

    return sueg

In [11]:
'''
Transpose trust wikipedia
'''

TT = transpose_trust(wikipedia_gnx)

TT_df = pd.DataFrame(TT, columns = ['FromNodeId', 'ToNodeId', 'Sign'])

TT_wiki_df = wikiinv_df.copy()
TT_wiki_df = TT_wiki_df.append(TT_df)

TT_wiki_df

Unnamed: 0,FromNodeId,ToNodeId,Sign
107079,3404,6307,-1.0
107078,8243,6307,-1.0
107077,6885,6307,-1.0
107076,7053,6307,-1.0
107075,8045,6307,-1.0
...,...,...,...
143486,50,19,0.5
143487,153,5,0.5
143488,93,5,0.5
143489,50,5,0.5


In [12]:
'''
Direct propagation wikipedia
'''

DP = direct_propagation(wikipedia_gnx)

DP_df = pd.DataFrame(DP, columns = ['FromNodeId', 'ToNodeId', 'Sign'])

DP_wiki_df = wikiinv_df.copy()
DP_wiki_df = DP_wiki_df.append(DP_df)

DP_wiki_df

Unnamed: 0,FromNodeId,ToNodeId,Sign
107079,3404,6307,-1.0
107078,8243,6307,-1.0
107077,6885,6307,-1.0
107076,7053,6307,-1.0
107075,8045,6307,-1.0
...,...,...,...
3342,33,23,0.5
3343,33,80,0.5
3344,152,3,0.5
3345,23,19,0.5


In [13]:
'''
Direct propagation slashdot
'''

DPsla = direct_propagation(slashdot_gnx)

DPsla_df = pd.DataFrame(DPsla, columns = ['FromNodeId', 'ToNodeId', 'Sign'])

DP_sla_df = slashdot_df.copy()
DP_sla_df = DP_sla_df.append(DPsla_df)

DP_sla_df

Unnamed: 0,FromNodeId,ToNodeId,Sign
0,0,1,1.0
1,0,2,1.0
2,0,3,1.0
3,0,4,1.0
4,0,5,1.0
...,...,...,...
11875,70785,80880,0.5
11876,75867,81448,0.5
11877,78243,78247,0.5
11878,78594,81806,0.5


In [None]:
chunk_size = 80000
chunks = [x for x in range(0, slashdot_df.shape[0], chunk_size)]

df_new = pd.concat([slashdot_df.iloc[ chunks[i]:chunks[i + 1] - 1 ].pivot('FromNodeId', 'ToNodeId', 'Sign') for i in range(0, len(chunks) - 1)])

In [None]:
plt.figure(figsize = (10,10))

sla_visu = slashdot_df.pivot('FromNodeId', 'ToNodeId', 'Sign')
ax = sns.heatmap(sla_visu, cmap = "Spectral")

In [None]:
plt.figure(figsize = (10,10))

wiki_visu = wikiinv_df.pivot('FromNodeId', 'ToNodeId', 'Sign')
ax = sns.heatmap(wiki_visu, cmap = "Spectral")

In [None]:
DP2_wiki_df = wikiinv_df.copy()
DP2_wiki_df = DP2_wiki_df.append(DP_df)

In [None]:
#DP2_wiki_df = DP2_wiki_df.reset_index(drop = True)

In [None]:
DP2_wiki_df

In [None]:
plt.figure(figsize = (20,20))


direct_prop_visu = DP_wiki_df.pivot('ToNodeId','FromNodeId',  'Sign')

ax = sns.heatmap(direct_prop_visu, cmap = "Spectral", vmax = 2, vmin = -2)


In [None]:
direct_prop_visu.to_numpy().shape

In [None]:
direct_prop_visu.shape