In [1]:
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import sklearn.linear_model as sk

In [2]:
def accuracy(y, pred):
    y = (y + 1) / 2
    pred = (pred + 1) / 2
    
    return 1 - np.sum(np.abs(y - pred)) / len(pred)

In [3]:
def crossValidation(X_input, y_input, standardize=True):
    
    X = X_input.copy()
    y = y_input.copy()
    
    if standardize:
        for column in X:
            X.loc[:,column] = (X[column] - X[column].mean()) / X[column].std()
    
    num_lines = len(X) // 10
    accuracies = []
    betas = np.zeros((len(X.columns), 1))
    intercept = 0
    for i in range(10):
        X_test = X[i*num_lines:(i+1)*num_lines]
        y_test = y[i*num_lines:(i+1)*num_lines]
        
        X_train = pd.concat([X[:i*num_lines], X[(i+1)*num_lines:]])
        y_train = pd.concat([y[:i*num_lines], y[(i+1)*num_lines:]])
        clf = sk.LogisticRegression(penalty='none', max_iter=10000).fit(X_train, y_train)
        accuracies.append(accuracy(y_test.values, clf.predict(X_test)))
        betas += clf.coef_.reshape(-1, 1) / 10
        intercept += clf.intercept_
        
    return np.mean(accuracies), betas, intercept

### Import data

In [4]:
PATH = 'Data/'

#Importing the epinions dataset

epinions = pd.read_csv(PATH + 'epinions_signed.txt', sep = '\t', header = 3)
epinions.rename(columns={'# FromNodeId': 'source', 'ToNodeId': 'target', 'Sign': 'sign'}, inplace=True)

#Importing the slashdot dataset

slashdot = pd.read_csv(PATH + 'slashdot_signed.txt', sep = '\t', header = 3)
slashdot.rename(columns={'# FromNodeId': 'source', 'ToNodeId': 'target', 'Sign': 'sign'}, inplace=True)

In [5]:
#The wikipedia dataset is quite old and quite chaotic, We need to parse it a little bit.

with open(PATH + 'wikipedia_signed.txt', 'r', encoding='latin-1') as f:
    lines = f.readlines()
with open(PATH + 'wikipedia_signed2.txt', 'w', encoding='latin-1') as f:
    for line in lines:
        if line.strip("\n").startswith('U') or line.strip("\n").startswith('V'):
            f.write(line)
            
wikipedia = pd.read_csv(PATH + 'wikipedia_signed2.txt', sep = '\t', encoding='latin-1', header=None,\
                   names=['user/voter', 'sign', 'source'])

wikipedia.loc[wikipedia['user/voter'] == 'U', 'source'] = wikipedia['sign']
wikipedia['target'] = 0

current_id = 0
for i, row in wikipedia.iterrows():
    if row[0] == 'U':
        current_id = row[2]
    else:
        wikipedia.at[i,'target'] = current_id
        
wikipedia = wikipedia[wikipedia['user/voter'] == 'V']
wikipedia = wikipedia[['source', 'target', 'sign']]
wikipedia['source'] = wikipedia['source'].astype(int)

#Wikipedia has edges with weight 0 when the user did not make a decision about the promotion, I remove them

wikipedia = wikipedia[wikipedia['sign'] != 0]

In [6]:
#The epinions and wikipedia dataset contains some self-loops, they are of no use for this project.

epinions = epinions[epinions['source'] != epinions['target']]
slashdot = slashdot[slashdot['source'] != slashdot['target']]
wikipedia = wikipedia[wikipedia['source'] != wikipedia['target']]

In [7]:
epinions.head()

Unnamed: 0,source,target,sign
0,0,1,-1
1,1,128552,-1
2,2,3,1
3,4,5,-1
4,4,155,-1


In [8]:
slashdot.head()

Unnamed: 0,source,target,sign
0,0,1,1
1,0,2,1
2,0,3,1
3,0,4,1
4,0,5,1


In [9]:
wikipedia.head()

Unnamed: 0,source,target,sign
1,3,30,1
2,25,30,-1
3,4,30,1
4,5,30,1
5,6,30,1


### Building the graphs

In [12]:
G_epinions = nx.from_pandas_edgelist(epinions, 'source', 'target', 'sign')
diG_epinions = nx.from_pandas_edgelist(epinions, 'source', 'target', 'sign', create_using=nx.DiGraph())

G_slashdot = nx.from_pandas_edgelist(slashdot, 'source', 'target', 'sign')
diG_slashdot = nx.from_pandas_edgelist(slashdot, 'source', 'target', 'sign', create_using=nx.DiGraph())

G_wikipedia = nx.from_pandas_edgelist(wikipedia, 'source', 'target', 'sign')
diG_wikipedia = nx.from_pandas_edgelist(wikipedia, 'source', 'target', 'sign', create_using=nx.DiGraph())

### Building the features

In [11]:
#Number of common Neighbors

epinions['embeddedness'] = epinions.apply(lambda x: len(list(nx.common_neighbors(G_epinions, x['source'],\
                                                                                     x['target']))), axis=1)

slashdot['embeddedness'] = slashdot.apply(lambda x: len(list(nx.common_neighbors(G_slashdot, x['source'],\
                                                                                     x['target']))), axis=1)

wikipedia['embeddedness'] = wikipedia.apply(lambda x: len(list(nx.common_neighbors(G_wikipedia, x['source'],\
                                                                                     #x['target']))), axis=1)

In [12]:
#Out-degree of source

epinions['out-degree'] = epinions.apply(lambda x: diG_epinions.out_degree(x['source']), axis=1)

slashdot['out-degree'] = slashdot.apply(lambda x: diG_slashdot.out_degree(x['source']), axis=1)

wikipedia['out-degree'] = wikipedia.apply(lambda x: diG_wikipedia.out_degree(x['source']), axis=1)

In [13]:
#In-degree of target

epinions['in-degree'] = epinions.apply(lambda x: diG_epinions.in_degree(x['target']), axis=1)

slashdot['in-degree'] = slashdot.apply(lambda x: diG_slashdot.in_degree(x['target']), axis=1)

wikipedia['in-degree'] = wikipedia.apply(lambda x: diG_wikipedia.in_degree(x['target']), axis=1)

In [14]:
#Positive in-degree of source

epinions_pos = epinions[epinions['sign'] == 1]
diG_epinions_pos = nx.from_pandas_edgelist(epinions_pos, 'source', 'target', 'sign', create_using=nx.DiGraph())

in_degrees = {}
for i in range(max(epinions['source'].max(), epinions['target'].max())):
    if i in list(diG_epinions_pos.nodes):
        in_degrees[i] = diG_epinions_pos.in_degree(i)
    else:
        in_degrees[i] = 0
        
in_degrees = pd.DataFrame(pd.Series(in_degrees))
in_degrees.rename(columns={0: 'source-in-degree-pos'}, inplace=True)
epinions = epinions.merge(in_degrees, left_on='source', right_index=True)



slashdot_pos = slashdot[slashdot['sign'] == 1]
diG_slashdot_pos = nx.from_pandas_edgelist(slashdot_pos, 'source', 'target', 'sign', create_using=nx.DiGraph())

in_degrees = {}
for i in range(max(slashdot['source'].max(), slashdot['target'].max())):
    if i in list(diG_slashdot_pos.nodes):
        in_degrees[i] = diG_slashdot_pos.in_degree(i)
    else:
        in_degrees[i] = 0
        
in_degrees = pd.DataFrame(pd.Series(in_degrees))
in_degrees.rename(columns={0: 'source-in-degree-pos'}, inplace=True)
slashdot = slashdot.merge(in_degrees, left_on='source', right_index=True)



wikipedia_pos = wikipedia[wikipedia['sign'] == 1]
diG_wikipedia_pos = nx.from_pandas_edgelist(wikipedia_pos, 'source', 'target', 'sign', create_using=nx.DiGraph())

in_degrees = {}
for i in range(max(wikipedia['source'].max(), wikipedia['target'].max())):
    if i in list(diG_wikipedia_pos.nodes):
        in_degrees[i] = diG_wikipedia_pos.in_degree(i)
    else:
        in_degrees[i] = 0
        
in_degrees = pd.DataFrame(pd.Series(in_degrees))
in_degrees.rename(columns={0: 'source-in-degree-pos'}, inplace=True)
wikipedia = wikipedia.merge(in_degrees, left_on='source', right_index=True)

In [15]:
#Positive out-degree of source

out_degrees = {}
for i in range(max(epinions['source'].max(), epinions['target'].max())):
    if i in list(diG_epinions_pos.nodes):
        out_degrees[i] = diG_epinions_pos.out_degree(i)
    else:
        out_degrees[i] = 0

out_degrees = pd.DataFrame(pd.Series(out_degrees))
out_degrees.rename(columns={0: 'source-out-degree-pos'}, inplace=True)
epinions = epinions.merge(out_degrees, left_on='source', right_index=True)



out_degrees = {}
for i in range(max(slashdot['source'].max(), slashdot['target'].max())):
    if i in list(diG_slashdot_pos.nodes):
        out_degrees[i] = diG_slashdot_pos.out_degree(i)
    else:
        out_degrees[i] = 0

out_degrees = pd.DataFrame(pd.Series(out_degrees))
out_degrees.rename(columns={0: 'source-out-degree-pos'}, inplace=True)
slashdot = slashdot.merge(out_degrees, left_on='source', right_index=True)



out_degrees = {}
for i in range(max(wikipedia['source'].max(), wikipedia['target'].max())):
    if i in list(diG_wikipedia_pos.nodes):
        out_degrees[i] = diG_wikipedia_pos.out_degree(i)
    else:
        out_degrees[i] = 0

out_degrees = pd.DataFrame(pd.Series(out_degrees))
out_degrees.rename(columns={0: 'source-out-degree-pos'}, inplace=True)
wikipedia = wikipedia.merge(out_degrees, left_on='source', right_index=True)

In [16]:
#Negative in-degree of target

epinions_neg = epinions[epinions['sign'] == -1]
diG_epinions_neg = nx.from_pandas_edgelist(epinions_neg, 'source', 'target', 'sign', create_using=nx.DiGraph())

in_degrees = {}
for i in range(max(epinions['source'].max(), epinions['target'].max())):
    if i in list(diG_epinions_neg.nodes):
        in_degrees[i] = diG_epinions_neg.in_degree(i)
    else:
        in_degrees[i] = 0
        
in_degrees = pd.DataFrame(pd.Series(in_degrees))
in_degrees.rename(columns={0: 'target-in-degree-neg'}, inplace=True)
epinions = epinions.merge(in_degrees, left_on='target', right_index=True)



slashdot_neg = slashdot[slashdot['sign'] == -1]
diG_slashdot_neg = nx.from_pandas_edgelist(slashdot_neg, 'source', 'target', 'sign', create_using=nx.DiGraph())

in_degrees = {}
for i in range(max(slashdot['source'].max(), slashdot['target'].max())):
    if i in list(diG_slashdot_neg.nodes):
        in_degrees[i] = diG_slashdot_neg.in_degree(i)
    else:
        in_degrees[i] = 0
        
in_degrees = pd.DataFrame(pd.Series(in_degrees))
in_degrees.rename(columns={0: 'target-in-degree-neg'}, inplace=True)
slashdot = slashdot.merge(in_degrees, left_on='target', right_index=True)



wikipedia_neg = wikipedia[wikipedia['sign'] == -1]
diG_wikipedia_neg = nx.from_pandas_edgelist(wikipedia_neg, 'source', 'target', 'sign', create_using=nx.DiGraph())

in_degrees = {}
for i in range(max(wikipedia['source'].max(), wikipedia['target'].max())):
    if i in list(diG_wikipedia_neg.nodes):
        in_degrees[i] = diG_wikipedia_neg.in_degree(i)
    else:
        in_degrees[i] = 0
        
in_degrees = pd.DataFrame(pd.Series(in_degrees))
in_degrees.rename(columns={0: 'target-in-degree-neg'}, inplace=True)
wikipedia = wikipedia.merge(in_degrees, left_on='target', right_index=True)

In [17]:
#Negative out-degree of target

out_degrees = {}
for i in range(max(epinions['source'].max(), epinions['target'].max())):
    if i in list(diG_epinions_neg.nodes):
        out_degrees[i] = diG_epinions_neg.out_degree(i)
    else:
        out_degrees[i] = 0
        
out_degrees = pd.DataFrame(pd.Series(out_degrees))
out_degrees.rename(columns={0: 'target-out-degree-neg'}, inplace=True)
epinions = epinions.merge(out_degrees, left_on='target', right_index=True)



out_degrees = {}
for i in range(max(slashdot['source'].max(), slashdot['target'].max())):
    if i in list(diG_slashdot_neg.nodes):
        out_degrees[i] = diG_slashdot_neg.out_degree(i)
    else:
        out_degrees[i] = 0
        
out_degrees = pd.DataFrame(pd.Series(out_degrees))
out_degrees.rename(columns={0: 'target-out-degree-neg'}, inplace=True)
slashdot = slashdot.merge(out_degrees, left_on='target', right_index=True)



out_degrees = {}
for i in range(max(wikipedia['source'].max(), wikipedia['target'].max())):
    if i in list(diG_wikipedia_neg.nodes):
        out_degrees[i] = diG_wikipedia_neg.out_degree(i)
    else:
        out_degrees[i] = 0
        
out_degrees = pd.DataFrame(pd.Series(out_degrees))
out_degrees.rename(columns={0: 'target-out-degree-neg'}, inplace=True)
wikipedia = wikipedia.merge(out_degrees, left_on='target', right_index=True)

In [18]:
def triad_census(G, diG, source, target):
    """Counts every type of triads between two given nodes"""
    
    paths = list(nx.all_simple_paths(G, source, target, cutoff = 2))
    
    weights = {1: 'p', -1: 'm'}
    triad_counts = {}
    
    for char1 in 'FB':
        for char2 in 'FB':
            for char3 in 'pm':
                for char4 in 'pm':
                    triad_counts[char1 + char2 + char3 + char4] = 0
    
    for path in paths:
        if len(path) == 3:
            if diG.has_edge(path[0], path[1]) and diG.has_edge(path[1], path[2]):
                triad_counts['FF' + weights[diG.get_edge_data(path[0], path[1])['sign']] + \
                                    weights[diG.get_edge_data(path[1], path[2])['sign']]] += 1
            if diG.has_edge(path[1], path[0]) and diG.has_edge(path[1], path[2]):
                triad_counts['BF' + weights[diG.get_edge_data(path[1], path[0])['sign']] + \
                                    weights[diG.get_edge_data(path[1], path[2])['sign']]] += 1
            if diG.has_edge(path[0], path[1]) and diG.has_edge(path[2], path[1]):
                triad_counts['FB' + weights[diG.get_edge_data(path[0], path[1])['sign']] + \
                                    weights[diG.get_edge_data(path[2], path[1])['sign']]] += 1
            if diG.has_edge(path[1], path[0]) and diG.has_edge(path[2], path[1]):
                triad_counts['BB' + weights[diG.get_edge_data(path[1], path[0])['sign']] + \
                                    weights[diG.get_edge_data(path[2], path[1])['sign']]] += 1
    return triad_counts

In [19]:
#Counting the triads

epinions = epinions.join(epinions.apply(lambda x: pd.Series(triad_census(G_epinions, diG_epinions,\
                                                                        x['source'],x['target'])), axis=1))

slashdot = slashdot.join(slashdot.apply(lambda x: pd.Series(triad_census(G_slashdot, diG_slashdot,\
                                                                        x['source'],x['target'])), axis=1))

wikipedia = wikipedia.join(wikipedia.apply(lambda x: pd.Series(triad_census(G_wikipedia, diG_wikipedia,\
                                                                        x['source'],x['target'])), axis=1))

### Training the model

Minimal Embededness of 0

In [15]:
epinions_neg = epinions[epinions['sign'] == -1]
num_neg = len(epinions_neg)
epinions_pos = epinions[epinions['sign'] == 1].sample(n=num_neg)
epinions_0 = pd.concat([epinions_pos, epinions_neg])
#We shuffle all the rows so we avoid the problem of having test sets consisting of only positives or negatives
epinions_0 = epinions_0.sample(frac=1)


slashdot_neg = slashdot[slashdot['sign'] == -1]
num_neg = len(slashdot_neg)
slashdot_pos = slashdot[slashdot['sign'] == 1].sample(n=num_neg)
slashdot_0 = pd.concat([slashdot_pos, slashdot_neg])
#We shuffle all the rows so we avoid the problem of having test sets consisting of only positives or negatives
slashdot_0 = slashdot_0.sample(frac=1)


wikipedia_neg = wikipedia[wikipedia['sign'] == -1]
num_neg = len(wikipedia_neg)
wikipedia_pos = wikipedia[wikipedia['sign'] == 1].sample(n=num_neg)
wikipedia_0 = pd.concat([wikipedia_pos, wikipedia_neg])
#We shuffle all the rows so we avoid the problem of having test sets consisting of only positives or negatives
wikipedia_0 = wikipedia_0.sample(frac=1)

In [16]:
epinions_y = epinions_0['sign']
slashdot_y = slashdot_0['sign']
wikipedia_y = wikipedia_0['sign']

Trying with only the degree information

In [17]:
epinions_degree = epinions_0[['embeddedness', 'out-degree', 'in-degree', 'source-in-degree-pos',\
                              'source-out-degree-pos', 'target-in-degree-neg', 'target-out-degree-neg']]

slashdot_degree = slashdot_0[['embeddedness', 'out-degree', 'in-degree', 'source-in-degree-pos',\
                              'source-out-degree-pos', 'target-in-degree-neg', 'target-out-degree-neg']]

wikipedia_degree = wikipedia_0[['embeddedness', 'out-degree', 'in-degree', 'source-in-degree-pos',\
                              'source-out-degree-pos', 'target-in-degree-neg', 'target-out-degree-neg']]

In [18]:
crossValidation(epinions_degree, epinions_y)[0]

0.8396215735424922

In [19]:
crossValidation(slashdot_degree, slashdot_y)[0]

0.7954402642391042

In [20]:
crossValidation(wikipedia_degree, wikipedia_y)[0]

0.8113207547169811

Trying only with the triads

In [21]:
epinions_triads = epinions_0.drop(columns=['embeddedness', 'out-degree', 'in-degree', 'source-in-degree-pos',\
                              'source-out-degree-pos', 'target-in-degree-neg', 'target-out-degree-neg', 'source',\
                                           'target', 'sign'], axis=1)

slashdot_triads = slashdot_0.drop(columns=['embeddedness', 'out-degree', 'in-degree', 'source-in-degree-pos',\
                              'source-out-degree-pos', 'target-in-degree-neg', 'target-out-degree-neg', 'source',\
                                           'target', 'sign'], axis=1)

wikipedia_triads = wikipedia_0.drop(columns=['embeddedness', 'out-degree', 'in-degree', 'source-in-degree-pos',\
                              'source-out-degree-pos', 'target-in-degree-neg', 'target-out-degree-neg', 'source',\
                                           'target', 'sign'], axis=1)

In [22]:
crossValidation(epinions_triads, epinions_y)[0]

0.8407576615185575

In [23]:
crossValidation(slashdot_triads, slashdot_y)[0]

0.6637637960203013

In [24]:
crossValidation(wikipedia_triads, wikipedia_y)[0]

0.7880720017349815

Trying to use everything

In [25]:
epinions_all = epinions_0.drop(columns = ['source', 'target', 'sign'])

slashdot_all = slashdot_0.drop(columns = ['source', 'target', 'sign'])

wikipedia_all = wikipedia_0.drop(columns = ['source', 'target', 'sign'])

In [26]:
crossValidation(epinions_all, epinions_y)[0]

0.8769224549203525

In [27]:
crossValidation(slashdot_all, slashdot_y)[0]

0.8139732538467734

In [28]:
crossValidation(wikipedia_all, wikipedia_y)[0]

0.8264150943396226

Minimal embeddedness of 10

In [29]:
epinions_neg = epinions[(epinions['sign'] == -1) & (epinions['embeddedness'] >= 10)]
num_neg = len(epinions_neg)
epinions_pos = epinions[(epinions['sign'] == 1) & (epinions['embeddedness'] >= 10)].sample(n=num_neg)
epinions_10 = pd.concat([epinions_pos, epinions_neg])
#We shuffle all the rows so we avoid the problem of having test sets consisting of only positives or negatives
epinions_10 = epinions_10.sample(frac=1)


slashdot_neg = slashdot[(slashdot['sign'] == -1) & (slashdot['embeddedness'] >= 10)]
num_neg = len(slashdot_neg)
slashdot_pos = slashdot[(slashdot['sign'] == 1) & (slashdot['embeddedness'] >= 10)].sample(n=num_neg)
slashdot_10 = pd.concat([slashdot_pos, slashdot_neg])
#We shuffle all the rows so we avoid the problem of having test sets consisting of only positives or negatives
slashdot_10 = slashdot_10.sample(frac=1)


wikipedia_neg = wikipedia[(wikipedia['sign'] == -1) & (wikipedia['embeddedness'] >= 10)]
num_neg = len(wikipedia_neg)
wikipedia_pos = wikipedia[(wikipedia['sign'] == 1) & (wikipedia['embeddedness'] >= 10)].sample(n=num_neg)
wikipedia_10 = pd.concat([wikipedia_pos, wikipedia_neg])
#We shuffle all the rows so we avoid the problem of having test sets consisting of only positives or negatives
wikipedia_10 = wikipedia_10.sample(frac=1)

In [30]:
epinions_y = epinions_10['sign']
slashdot_y = slashdot_10['sign']
wikipedia_y = wikipedia_10['sign']

Trying with only degree information

In [31]:
epinions_degree = epinions_10[['embeddedness', 'out-degree', 'in-degree', 'source-in-degree-pos',\
                              'source-out-degree-pos', 'target-in-degree-neg', 'target-out-degree-neg']]

slashdot_degree = slashdot_10[['embeddedness', 'out-degree', 'in-degree', 'source-in-degree-pos',\
                              'source-out-degree-pos', 'target-in-degree-neg', 'target-out-degree-neg']]

wikipedia_degree = wikipedia_10[['embeddedness', 'out-degree', 'in-degree', 'source-in-degree-pos',\
                              'source-out-degree-pos', 'target-in-degree-neg', 'target-out-degree-neg']]

In [32]:
crossValidation(epinions_degree, epinions_y)[0]

0.8860858257477243

In [33]:
crossValidation(slashdot_degree, slashdot_y)[0]

0.8579330422125182

In [34]:
crossValidation(wikipedia_degree, wikipedia_y)[0]

0.7987227248536455

Trying with only triads

In [35]:
epinions_triads = epinions_10.drop(columns=['embeddedness', 'out-degree', 'in-degree', 'source-in-degree-pos',\
                              'source-out-degree-pos', 'target-in-degree-neg', 'target-out-degree-neg', 'source',\
                                           'target', 'sign'], axis=1)

slashdot_triads = slashdot_10.drop(columns=['embeddedness', 'out-degree', 'in-degree', 'source-in-degree-pos',\
                              'source-out-degree-pos', 'target-in-degree-neg', 'target-out-degree-neg', 'source',\
                                           'target', 'sign'], axis=1)

wikipedia_triads = wikipedia_10.drop(columns=['embeddedness', 'out-degree', 'in-degree', 'source-in-degree-pos',\
                              'source-out-degree-pos', 'target-in-degree-neg', 'target-out-degree-neg', 'source',\
                                           'target', 'sign'], axis=1)

In [36]:
crossValidation(epinions_triads, epinions_y)[0]

0.934084669845398

In [37]:
crossValidation(slashdot_triads, slashdot_y)[0]

0.9014556040756914

In [38]:
crossValidation(wikipedia_triads, wikipedia_y)[0]

0.81149547631719

Trying with everything

In [39]:
epinions_all = epinions_10.drop(columns = ['source', 'target', 'sign'])

slashdot_all = slashdot_10.drop(columns = ['source', 'target', 'sign'])

wikipedia_all = wikipedia_10.drop(columns = ['source', 'target', 'sign'])

In [40]:
crossValidation(epinions_all, epinions_y)[0]

0.9354862014159803

In [41]:
crossValidation(slashdot_all, slashdot_y)[0]

0.9078602620087336

In [42]:
crossValidation(wikipedia_all, wikipedia_y)[0]

0.8215007982969664

Minimal embeddedness of 25

In [43]:
epinions_neg = epinions[(epinions['sign'] == -1) & (epinions['embeddedness'] >= 25)]
num_neg = len(epinions_neg)
epinions_pos = epinions[(epinions['sign'] == 1) & (epinions['embeddedness'] >= 25)].sample(n=num_neg)
epinions_25 = pd.concat([epinions_pos, epinions_neg])
#We shuffle all the rows so we avoid the problem of having test sets consisting of only positives or negatives
epinions_25 = epinions_25.sample(frac=1)


slashdot_neg = slashdot[(slashdot['sign'] == -1) & (slashdot['embeddedness'] >= 25)]
num_neg = len(slashdot_neg)
slashdot_pos = slashdot[(slashdot['sign'] == 1) & (slashdot['embeddedness'] >= 25)].sample(n=num_neg)
slashdot_25 = pd.concat([slashdot_pos, slashdot_neg])
#We shuffle all the rows so we avoid the problem of having test sets consisting of only positives or negatives
slashdot_25 = slashdot_25.sample(frac=1)


wikipedia_neg = wikipedia[(wikipedia['sign'] == -1) & (wikipedia['embeddedness'] >= 25)]
num_neg = len(wikipedia_neg)
wikipedia_pos = wikipedia[(wikipedia['sign'] == 1) & (wikipedia['embeddedness'] >= 25)].sample(n=num_neg)
wikipedia_25 = pd.concat([wikipedia_pos, wikipedia_neg])
#We shuffle all the rows so we avoid the problem of having test sets consisting of only positives or negatives
wikipedia_25 = wikipedia_25.sample(frac=1)

In [15]:
wikipedia_neg = wikipedia[(wikipedia['sign'] == -1) & (wikipedia['embeddedness'] >= 25)]
num_neg = len(wikipedia_neg)
wikipedia_pos = wikipedia[(wikipedia['sign'] == 1) & (wikipedia['embeddedness'] >= 25)].sample(n=num_neg)
wikipedia_25 = pd.concat([wikipedia_pos, wikipedia_neg])
#We shuffle all the rows so we avoid the problem of having test sets consisting of only positives or negatives
wikipedia_25 = wikipedia_25.sample(frac=1)

In [44]:
epinions_y = epinions_25['sign']
slashdot_y = slashdot_25['sign']
wikipedia_y = wikipedia_25['sign']

Trying with only degree information

In [45]:
epinions_degree = epinions_25[['embeddedness', 'out-degree', 'in-degree', 'source-in-degree-pos',\
                              'source-out-degree-pos', 'target-in-degree-neg', 'target-out-degree-neg']]

slashdot_degree = slashdot_25[['embeddedness', 'out-degree', 'in-degree', 'source-in-degree-pos',\
                              'source-out-degree-pos', 'target-in-degree-neg', 'target-out-degree-neg']]

wikipedia_degree = wikipedia_25[['embeddedness', 'out-degree', 'in-degree', 'source-in-degree-pos',\
                              'source-out-degree-pos', 'target-in-degree-neg', 'target-out-degree-neg']]

In [46]:
crossValidation(epinions_degree, epinions_y)[0]

0.8830985915492958

In [47]:
crossValidation(slashdot_degree, slashdot_y)[0]

0.8948529411764706

In [48]:
crossValidation(wikipedia_degree, wikipedia_y)[0]

0.7707610146862484

Trying with only triad information

In [49]:
epinions_triads = epinions_25.drop(columns=['embeddedness', 'out-degree', 'in-degree', 'source-in-degree-pos',\
                              'source-out-degree-pos', 'target-in-degree-neg', 'target-out-degree-neg', 'source',\
                                           'target', 'sign'], axis=1)

slashdot_triads = slashdot_25.drop(columns=['embeddedness', 'out-degree', 'in-degree', 'source-in-degree-pos',\
                              'source-out-degree-pos', 'target-in-degree-neg', 'target-out-degree-neg', 'source',\
                                           'target', 'sign'], axis=1)

wikipedia_triads = wikipedia_25.drop(columns=['embeddedness', 'out-degree', 'in-degree', 'source-in-degree-pos',\
                              'source-out-degree-pos', 'target-in-degree-neg', 'target-out-degree-neg', 'source',\
                                           'target', 'sign'], axis=1)

In [50]:
crossValidation(epinions_triads, epinions_y)[0]

0.9361226180613091

In [51]:
crossValidation(slashdot_triads, slashdot_y)[0]

0.9352941176470588

In [52]:
crossValidation(wikipedia_triads, wikipedia_y)[0]

0.7979973297730306

Trying with all information

In [53]:
epinions_all = epinions_25.drop(columns = ['source', 'target', 'sign'])

slashdot_all = slashdot_25.drop(columns = ['source', 'target', 'sign'])

wikipedia_all = wikipedia_25.drop(columns = ['source', 'target', 'sign'])

In [54]:
crossValidation(epinions_all, epinions_y)[0]

0.9359016846175091

In [55]:
crossValidation(slashdot_all, slashdot_y)[0]

0.9363970588235293

In [56]:
crossValidation(wikipedia_all, wikipedia_y)[0]

0.802136181575434

Defining the Quadriads

In [57]:
def quadriad_census(G, diG, source, target):
    """Counts every type of triads between two given nodes"""
    
    paths = list(nx.all_simple_paths(G, source, target, cutoff = 3))
    
    weights = {1: 'p', -1: 'm'}
    triad_counts = {}
    
    for char1 in 'FB':
        for char2 in 'FB':
            for char3 in 'FB':
                for char4 in 'pm':
                    for char5 in 'pm':
                        for char6 in 'pm':
                            triad_counts[char1 + char2 + char3 + char4 + char5 + char6] = 0
    
    for path in paths:
        if len(path) == 4:
            if diG.has_edge(path[0], path[1]) and diG.has_edge(path[1], path[2]):
                if diG.has_edge(path[2], path[3]):
                    triad_counts['FFF' + weights[diG.get_edge_data(path[0], path[1])['sign']] + \
                                         weights[diG.get_edge_data(path[1], path[2])['sign']] + \
                                         weights[diG.get_edge_data(path[2], path[3])['sign']]] += 1
                if diG.has_edge(path[3], path[2]):
                    triad_counts['FFB' + weights[diG.get_edge_data(path[0], path[1])['sign']] + \
                                         weights[diG.get_edge_data(path[1], path[2])['sign']] + \
                                         weights[diG.get_edge_data(path[3], path[2])['sign']]] += 1
                    
                    
            if diG.has_edge(path[1], path[0]) and diG.has_edge(path[1], path[2]):
                if diG.has_edge(path[2], path[3]):
                    triad_counts['BFF' + weights[diG.get_edge_data(path[1], path[0])['sign']] + \
                                         weights[diG.get_edge_data(path[1], path[2])['sign']] + \
                                         weights[diG.get_edge_data(path[2], path[3])['sign']]] += 1
                if diG.has_edge(path[3], path[2]):
                    triad_counts['BFB' + weights[diG.get_edge_data(path[1], path[0])['sign']] + \
                                         weights[diG.get_edge_data(path[1], path[2])['sign']] + \
                                         weights[diG.get_edge_data(path[3], path[2])['sign']]] += 1
                    
                    
            if diG.has_edge(path[0], path[1]) and diG.has_edge(path[2], path[1]):
                if diG.has_edge(path[2], path[3]):
                    triad_counts['FBF' + weights[diG.get_edge_data(path[0], path[1])['sign']] + \
                                         weights[diG.get_edge_data(path[2], path[1])['sign']] + \
                                         weights[diG.get_edge_data(path[2], path[3])['sign']]] += 1
                if diG.has_edge(path[3], path[2]):
                    triad_counts['FBB' + weights[diG.get_edge_data(path[0], path[1])['sign']] + \
                                         weights[diG.get_edge_data(path[2], path[1])['sign']] + \
                                         weights[diG.get_edge_data(path[3], path[2])['sign']]] += 1
                    
                    
            if diG.has_edge(path[1], path[0]) and diG.has_edge(path[2], path[1]):
                if diG.has_edge(path[2], path[3]):
                    triad_counts['BBF' + weights[diG.get_edge_data(path[1], path[0])['sign']] + \
                                         weights[diG.get_edge_data(path[2], path[1])['sign']] + \
                                         weights[diG.get_edge_data(path[2], path[3])['sign']]] += 1
                if diG.has_edge(path[3], path[2]):
                    triad_counts['BBB' + weights[diG.get_edge_data(path[1], path[0])['sign']] + \
                                         weights[diG.get_edge_data(path[2], path[1])['sign']] + \
                                         weights[diG.get_edge_data(path[3], path[2])['sign']]] += 1
    return triad_counts

In [69]:
slashdot_25 = slashdot_25.join(slashdot_25.apply(lambda x: pd.Series(quadriad_census(G_slashdot, diG_slashdot,\
                                                                        x['source'], x['target'])), axis=1))

In [72]:
wikipedia_25 = wikipedia_25.join(wikipedia_25.apply(lambda x: pd.Series(quadriad_census(G_wikipedia, diG_wikipedia,\
                                                                        x['source'], x['target'])), axis=1))

In [15]:
slashdot_quad = slashdot_25.drop(columns=['embeddedness', 'out-degree', 'in-degree', 'source-in-degree-pos',\
                              'source-out-degree-pos', 'target-in-degree-neg', 'target-out-degree-neg', 'source',\
                                           'target', 'sign', 'FFpp', 'FFpm', 'FFmp', 'FFmm', 'FBpp',\
                                         'FBpm', 'FBmp', 'FBmm', 'BFpp', 'BFpm', 'BFmp', 'BFmm', 'BBpp',\
                                         'BBpm', 'BBmp', 'BBmm'], axis=1)

wikipedia_quad = wikipedia_25.drop(columns=['embeddedness', 'out-degree', 'in-degree', 'source-in-degree-pos',\
                              'source-out-degree-pos', 'target-in-degree-neg', 'target-out-degree-neg', 'source',\
                                           'target', 'sign', 'FFpp', 'FFpm', 'FFmp', 'FFmm', 'FBpp',\
                                         'FBpm', 'FBmp', 'FBmm', 'BFpp', 'BFpm', 'BFmp', 'BFmm', 'BBpp',\
                                         'BBpm', 'BBmp', 'BBmm'], axis=1)

In [19]:
slashdot_quad_all = slashdot_25.drop(columns=['source', 'target', 'sign', 'FFpp', 'FFpm', 'FFmp', 'FFmm', 'FBpp',\
                                         'FBpm', 'FBmp', 'FBmm', 'BFpp', 'BFpm', 'BFmp', 'BFmm', 'BBpp',\
                                         'BBpm', 'BBmp', 'BBmm'])
wikipedia_quad_all = wikipedia_25.drop(columns=['source', 'target', 'sign', 'FFpp', 'FFpm', 'FFmp', 'FFmm', 'FBpp',\
                                         'FBpm', 'FBmp', 'FBmm', 'BFpp', 'BFpm', 'BFmp', 'BFmm', 'BBpp',\
                                         'BBpm', 'BBmp', 'BBmm'])

In [16]:
slashdot_y_25 = slashdot_25['sign']
wikipedia_y_25 = wikipedia_25['sign']

In [45]:
_, betas, intercept = crossValidation(slashdot_quad, slashdot_y_25)

In [46]:
quad_betas = list(zip(np.squeeze(betas), slashdot_quad.columns.values))

In [47]:
quad_betas.sort(key=lambda tup: tup[0])

In [48]:
quad_betas

[(-11.52769480681479, 'FBFmmm'),
 (-5.268095417389494, 'FFBmpp'),
 (-5.100020686389061, 'FBFmpp'),
 (-4.754343144668427, 'FFFppp'),
 (-2.667038335833678, 'FBBppp'),
 (-2.2471899195228904, 'FFFmpm'),
 (-1.650257442978822, 'FBBppm'),
 (-1.548162004230935, 'BBFppp'),
 (-1.244324248989964, 'BFFppm'),
 (-1.233383800052531, 'BFFmpp'),
 (-1.1664099240446437, 'BBBmmm'),
 (-0.854520780623408, 'BFFmmm'),
 (-0.80951402057718, 'FFBmmm'),
 (-0.643237710736469, 'FBFppm'),
 (-0.5669334455642563, 'BFFpmp'),
 (-0.5555912090668387, 'BFBmmm'),
 (-0.518588341302396, 'BBFmpm'),
 (-0.5084015416417441, 'BBFpmm'),
 (-0.4138019770416442, 'FFFpmp'),
 (-0.39585217743959683, 'BBBppp'),
 (-0.32913903598396826, 'BFBppm'),
 (-0.30279728695599817, 'BBBppm'),
 (-0.25483835095500346, 'BFBpmp'),
 (-0.24916592254629294, 'BFFmmp'),
 (-0.20797713202999576, 'FFBmpm'),
 (-0.2029479481749145, 'BFBmpm'),
 (-0.1676418455649119, 'FBFmmp'),
 (-0.13700753877854252, 'FBFpmp'),
 (-0.12807551903749625, 'FFFpmm'),
 (-0.112101720096475

In [18]:
crossValidation(wikipedia_quad, wikipedia_y_25)[0]

0.8148197596795728

In [20]:
crossValidation(slashdot_quad_all, slashdot_y_25)[0]

0.9411764705882353

In [22]:
crossValidation(wikipedia_quad_all, wikipedia_y_25)[0]

0.8160213618157544

### Reduced Models

#### Balance Model

In [6]:
def triad_census_balance(G, source, target):
    
    paths = list(nx.all_simple_paths(G, source, target, cutoff = 2))
    
    weights = {1: 'p', -1: 'm'}
    triad_counts = {}
    
    for char1 in 'pm':
        for char2 in 'pm':
            triad_counts[char1 + char2] = 0
    
    for path in paths:
        if len(path) == 3:
            triad_counts[weights[G.get_edge_data(path[0], path[1])['sign']] + \
                         weights[G.get_edge_data(path[1], path[2])['sign']]] += 1
    return triad_counts

In [7]:
def quadriad_census_balance(G, source, target):
    
    paths = list(nx.all_simple_paths(G, source, target, cutoff = 3))
    
    weights = {1: 'p', -1: 'm'}
    quadriad_counts = {}
    
    for char1 in 'pm':
        for char2 in 'pm':
            for char3 in 'pm':
                quadriad_counts[char1 + char2 + char3] = 0
    
    for path in paths:
        if len(path) == 4:
            quadriad_counts[weights[G.get_edge_data(path[0], path[1])['sign']] + \
                         weights[G.get_edge_data(path[1], path[2])['sign']] + \
                         weights[G.get_edge_data(path[2], path[3])['sign']]] += 1
    return quadriad_counts

In [39]:
epinions_reduced = epinions_25[['embeddedness', 'out-degree', 'in-degree', 'source-in-degree-pos',\
                              'source-out-degree-pos', 'target-in-degree-neg', 'target-out-degree-neg', 'source',\
                                           'target', 'sign']]

slashdot_reduced = slashdot_25[['embeddedness', 'out-degree', 'in-degree', 'source-in-degree-pos',\
                              'source-out-degree-pos', 'target-in-degree-neg', 'target-out-degree-neg', 'source',\
                                           'target', 'sign']]

wikipedia_reduced = wikipedia_25[['embeddedness', 'out-degree', 'in-degree', 'source-in-degree-pos',\
                              'source-out-degree-pos', 'target-in-degree-neg', 'target-out-degree-neg', 'source',\
                                           'target', 'sign']]

In [40]:
epinions_reduced = epinions_reduced.join(epinions_reduced.apply(lambda x: \
                                    pd.Series(triad_census_balance(G_epinions,x['source'], x['target'])), axis=1))

In [14]:
slashdot_reduced = slashdot_reduced.join(slashdot_reduced.apply(lambda x: \
                                    pd.Series(triad_census_balance(G_slashdot,x['source'], x['target'])), axis=1))

wikipedia_reduced = wikipedia_reduced.join(slashdot_reduced.apply(lambda x: \
                                    pd.Series(triad_census_balance(G_wikipedia,x['source'], x['target'])), axis=1))

In [30]:
wikipedia_reduced = wikipedia_reduced.join(wikipedia_reduced.apply(lambda x: \
                                    pd.Series(triad_census_balance(G_wikipedia,x['source'], x['target'])), axis=1))

In [33]:
wikipedia_reduced = wikipedia_reduced.join(wikipedia_reduced.apply(lambda x: \
                                    pd.Series(quadriad_census_balance(G_wikipedia,x['source'], x['target'])), axis=1))

In [13]:
slashdot_reduced = slashdot_reduced.join(slashdot_reduced.apply(lambda x: \
                                    pd.Series(quadriad_census_balance(G_slashdot,x['source'], x['target'])), axis=1))

wikipedia_reduced = wikipedia_reduced.join(wikipedia_reduced.apply(lambda x: \
                                    pd.Series(quadriad_census_balance(G_wikipedia,x['source'], x['target'])), axis=1))

In [43]:
epinions_reduced_triads = epinions_reduced[['pp', 'pm', 'mp', 'mm']]
slashdot_reduced_triads = slashdot_reduced[['pp', 'pm', 'mp', 'mm']]
wikiepdia_reduced_triads = wikipedia_reduced[['pp', 'pm', 'mp', 'mm']]

slashdot_reduced_quadriads = slashdot_reduced[['ppp', 'ppm', 'pmp', 'mpp', 'pmm', 'mpm', 'mmp', 'mmm']]
wikipedia_reduced_quadriads = wikipedia_reduced[['ppp', 'ppm', 'pmp', 'mpp', 'pmm', 'mpm', 'mmp', 'mmm']]

In [44]:
epinions_y_reduced = epinions_reduced['sign']
slashdot_y_reduced = slashdot_reduced['sign']
wikipedia_y_reduced = wikipedia_reduced['sign']

In [45]:
_, betas, intercept = crossValidation(epinions_reduced_triads, epinions_y_reduced)

In [46]:
intercept

array([-7.34683297])

In [47]:
betas

array([[ 2.39943877],
       [-1.78952175],
       [-4.7763327 ],
       [-0.01169819]])

In [48]:
_, betas, intercept = crossValidation(slashdot_reduced_triads, slashdot_y_reduced)

In [49]:
intercept

array([-19.04516573])

In [50]:
betas

array([[ 1.25232399],
       [-2.19057836],
       [-5.13172741],
       [-0.05457162]])

In [51]:
_, betas, intercept = crossValidation(wikiepdia_reduced_triads, wikipedia_y_reduced)

In [52]:
intercept

array([-0.67439586])

In [53]:
betas

array([[ 1.22439761],
       [-1.04324411],
       [-1.08419836],
       [-0.07007986]])

In [54]:
_, betas, intercept = crossValidation(slashdot_reduced_quadriads, slashdot_y_reduced)

In [55]:
intercept

array([-30.57173967])

In [56]:
betas

array([[ 1.06861821],
       [-1.05265836],
       [ 0.07814483],
       [-2.65696839],
       [ 0.47711665],
       [ 0.30796233],
       [ 0.18640541],
       [-5.3681705 ]])

In [57]:
_, betas, intercept = crossValidation(wikipedia_reduced_quadriads, wikipedia_y_reduced)

In [58]:
intercept

array([-2.43682983])

In [59]:
betas

array([[ 1.45227612],
       [-1.26664667],
       [-0.84612195],
       [-1.27534776],
       [ 0.84796681],
       [ 1.03308246],
       [ 0.96518773],
       [-2.28582031]])

#### Status model

In [74]:
diG_epinions_status = nx.DiGraph()
diG_slashdot_status = nx.DiGraph()
diG_wikipedia_status = nx.DiGraph()

In [75]:
for edge in diG_epinions.edges:
    if diG_epinions.get_edge_data(edge[0], edge[1])['sign'] == 1:
        diG_epinions_status.add_edge(edge[0], edge[1])
    else:
        diG_epinions_status.add_edge(edge[1], edge[0])

In [76]:
for edge in diG_slashdot.edges:
    if diG_slashdot.get_edge_data(edge[0], edge[1])['sign'] == 1:
        diG_slashdot_status.add_edge(edge[0], edge[1])
    else:
        diG_slashdot_status.add_edge(edge[1], edge[0])

In [77]:
for edge in diG_wikipedia.edges:
    if diG_wikipedia.get_edge_data(edge[0], edge[1])['sign'] == 1:
        diG_wikipedia_status.add_edge(edge[0], edge[1])
    else:
        diG_wikipedia_status.add_edge(edge[1], edge[0])

In [94]:
def triad_census_status(G, diG, source, target):
    
    paths = list(nx.all_simple_paths(G, source, target, cutoff = 2))
    
    triad_counts = {}
    
    for char1 in 'FB':
        for char2 in 'FB':
            triad_counts[char1 + char2] = 0
    
    for path in paths:
        if len(path) == 3:
            if diG.has_edge(path[0], path[1]):
                if diG.has_edge(path[1], path[2]):
                    triad_counts['FF'] += 1
                if diG.has_edge(path[2], path[1]):
                    triad_counts['FB'] += 1
            if diG.has_edge(path[1], path[0]):
                if diG.has_edge(path[1], path[2]):
                    triad_counts['BF'] += 1
                if diG.has_edge(path[2], path[1]):
                    triad_counts['BB'] += 1
                    
    return triad_counts

In [103]:
def quadriad_census_status(G, diG, source, target):
    
    paths = list(nx.all_simple_paths(G, source, target, cutoff = 3))
    
    triad_counts = {}
    
    for char1 in 'FB':
        for char2 in 'FB':
            for char3 in 'FB':
                triad_counts[char1 + char2 + char3] = 0
    
    for path in paths:
        if len(path) == 4:
            if diG.has_edge(path[0], path[1]):
                if diG.has_edge(path[1], path[2]):
                    if diG.has_edge(path[2], path[3]):
                        triad_counts['FFF'] += 1
                    if diG.has_edge(path[3], path[2]):
                        triad_counts['FFB'] += 1
                        
                if diG.has_edge(path[2], path[1]):
                    if diG.has_edge(path[2], path[3]):
                        triad_counts['FBF'] += 1
                    if diG.has_edge(path[3], path[2]):   
                        triad_counts['FBB'] += 1
                        
            if diG.has_edge(path[1], path[0]):
                if diG.has_edge(path[1], path[2]):
                    if diG.has_edge(path[2], path[3]):
                        triad_counts['BFF'] += 1
                    if diG.has_edge(path[3], path[2]):
                        triad_counts['BFB'] += 1
                if diG.has_edge(path[2], path[1]):
                    if diG.has_edge(path[2], path[3]):
                        triad_counts['BBF'] += 1
                    if diG.has_edge(path[3], path[2]):
                        triad_counts['BBB'] += 1
                    
    return triad_counts

In [95]:
epinions_reduced = epinions_reduced.join(epinions_reduced.apply(lambda x: \
                                    pd.Series(triad_census_status(G_epinions,diG_epinions_status,x['source'], \
                                                                  x['target'])), axis=1))

slashdot_reduced = slashdot_reduced.join(slashdot_reduced.apply(lambda x: \
                                    pd.Series(triad_census_status(G_slashdot,diG_slashdot_status,x['source'], \
                                                                  x['target'])), axis=1))

wikipedia_reduced = wikipedia_reduced.join(wikipedia_reduced.apply(lambda x: \
                                    pd.Series(triad_census_status(G_wikipedia,diG_wikipedia_status,x['source'], \
                                                                  x['target'])), axis=1))

In [111]:
slashdot_reduced = slashdot_reduced.join(slashdot_reduced.apply(lambda x: \
                                    pd.Series(quadriad_census_status(G_slashdot,diG_slashdot_status,x['source'],\
                                                                  x['target'])), axis=1))

wikipedia_reduced = wikipedia_reduced.join(wikipedia_reduced.apply(lambda x: \
                                    pd.Series(quadriad_census_status(G_wikipedia,diG_wikipedia_status,x['source'],\
                                                                  x['target'])), axis=1))

In [120]:
epinions_reduced_triads_stat = epinions_reduced[['FF', 'FB', 'BF', 'BB']]
slashdot_reduced_triads_stat = slashdot_reduced[['FF', 'FB', 'BF', 'BB']]
wikipedia_reduced_triads_stat = wikipedia_reduced[['FF', 'FB', 'BF', 'BB']]

slashdot_reduced_quadriads_stat = slashdot_reduced[['FFF', 'FFB', 'FBF', 'BFF', 'FBB', 'BFB', 'BBF', 'BBB']]
wikipedia_reduced_quadriads_stat = wikipedia_reduced[['FFF', 'FFB', 'FBF', 'BFF', 'FBB', 'BFB', 'BBF', 'BBB']]

In [121]:
_, betas, intercept = crossValidation(epinions_reduced_triads_stat, epinions_y_reduced)

In [122]:
intercept

array([3.87080113])

In [123]:
betas

array([[ 2.62825473],
       [ 1.82120463],
       [-0.44302065],
       [-2.1014204 ]])

In [124]:
_, betas, intercept = crossValidation(slashdot_reduced_triads_stat, slashdot_y_reduced)

In [125]:
intercept

array([2.90321853])

In [126]:
betas

array([[ 0.58240284],
       [ 2.79108266],
       [ 1.0467067 ],
       [-1.77516237]])

In [127]:
_, betas, intercept = crossValidation(wikipedia_reduced_triads_stat, wikipedia_y_reduced)

In [128]:
intercept

array([-0.42242072])

In [129]:
betas

array([[ 0.17759964],
       [ 0.73529805],
       [ 0.6648401 ],
       [-1.34679417]])

In [130]:
_, betas, intercept = crossValidation(slashdot_reduced_quadriads_stat, slashdot_y_reduced)

In [131]:
intercept

array([2.47429066])

In [132]:
betas

array([[-3.95196216],
       [ 5.84998175],
       [ 4.5395544 ],
       [ 5.27456788],
       [-3.46645429],
       [-1.64647627],
       [-4.85253849],
       [ 0.81412854]])

In [133]:
_, betas, intercept = crossValidation(wikipedia_reduced_quadriads_stat, wikipedia_y_reduced)

In [134]:
betas

array([[-1.20507387],
       [ 2.29558945],
       [ 1.86995889],
       [ 1.1729688 ],
       [-2.46693081],
       [ 1.29576231],
       [-1.16574279],
       [-1.88395997]])

In [135]:
intercept

array([-0.86324494])

In [74]:
#slashdot_25.to_pickle(PATH + 'slashdot_quad.pkl')
#wikipedia_25.to_pickle(PATH + 'wikipedia_quad.pkl')

In [20]:
#epinions.to_pickle(PATH + 'epinion.pkl')
#slashdot.to_pickle(PATH + 'slashdot.pkl')
#wikipedia.to_pickle(PATH + 'wikipedia.pkl')

In [136]:
#slashdot_reduced.to_pickle(PATH + 'slashdot_quad_status.pkl')
#wikipedia_reduced.to_pickle(PATH + 'wikipedia_quad_status.pkl')

In [37]:
epinions = pd.read_pickle(PATH + 'epinion.pkl')
slashdot = pd.read_pickle(PATH + 'slashdot.pkl')
wikipedia = pd.read_pickle(PATH + 'wikipedia.pkl')

In [27]:
slashdot_25 = pd.read_pickle(PATH + 'slashdot_quad.pkl')
wikipedia_25 = pd.read_pickle(PATH + 'wikipedia_quad.pkl')