# Embedding Controversy

In [8]:
import networkx as nx
import pandas as pd
import numpy as np
import csv
import datetime as dt
import re
import os
import json
import random
import time,pickle
import scipy.sparse as sp
from operator import itemgetter
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

In [2]:
from scipy.sparse import spdiags, coo_matrix
import scipy as sp
import sys
import math
import collections

In [3]:
## Now the layout function
def forceatlas2_layout(G, iterations=10, linlog=False, pos=None, nohubs=False,
                       kr=0.001, k=None, dim=2):
    """
    Options values are
    g                The graph to layout
    iterations       Number of iterations to do
    linlog           Whether to use linear or log repulsion
    random_init      Start with a random position
                     If false, start with FR
    avoidoverlap     Whether to avoid overlap of points
    degreebased      Degree based repulsion
    """
    # We add attributes to store the current and previous convergence speed
    for n in G:
        G.nodes[n]['prevcs'] = 0
        G.nodes[n]['currcs'] = 0
        # To numpy matrix
    # This comes from the spares FR layout in nx
    A = nx.to_scipy_sparse_matrix(G, dtype='f')
    nnodes, _ = A.shape

    try:
        A = A.tolil()
    except Exception as e:
        A = (coo_matrix(A)).tolil()
    if pos is None:
        pos = np.asarray(np.random.random((nnodes, dim)), dtype=A.dtype)
    else:
        pos = pos.astype(A.dtype)
    if k is None:
        k = np.sqrt(1.0 / nnodes)
        # Iterations
    # the initial "temperature" is about .1 of domain area (=1x1)
    # this is the largest step allowed in the dynamics.
    t = 0.1
    # simple cooling scheme.
    # linearly step down by dt on each iteration so last iteration is size dt.
    dt = t / float(iterations + 1)
    displacement = np.zeros((dim, nnodes))
    for iteration in range(iterations):
        displacement *= 0
        # loop over rows
        for i in range(A.shape[0]):
            # difference between this row's node position and all others
            delta = (pos[i] - pos).T
            # distance between points
            distance = np.sqrt((delta ** 2).sum(axis=0))
            # enforce minimum distance of 0.01
            distance = np.where(distance < 0.01, 0.01, distance)
            # the adjacency matrix row
            Ai = np.asarray(A.getrowview(i).toarray())
            # displacement "force"
            Dist = k * k / distance ** 2
            if nohubs:
                Dist = Dist / float(Ai.sum(axis=1) + 1)
            if linlog:
                Dist = np.log(Dist + 1)
            displacement[:, i] += \
                (delta * (Dist - Ai * distance / k)).sum(axis=1)
            # update positions
        length = np.sqrt((displacement ** 2).sum(axis=0))
        length = np.where(length < 0.01, 0.01, length)
        pos += (displacement * t / length).T
        # cool temperature
        t -= dt
        # Return the layout
    print("Force Atlas done")
    return dict(zip(G, pos))

In [5]:
path= "C:/MyPath/Graphs/"
savepath= "C:/MyPath/Forceatlas/" 

In [None]:
FW = nx.read_weighted_edgelist(path + "guncontrol_edgelist.txt" , create_using=nx.Graph(), delimiter=",")
positions = forceatlas2_layout(FW, linlog=False, nohubs=False, iterations=1000)
out = open(savepath + "guncontrol_reddit" + "_positions.txt","w");

In [None]:
#    print positions
for keys in positions.keys():
    try:
        out.write(keys + "\t" + str(positions[keys][0]) + "," + str(positions[keys][1]) + "\n");
    except:
        print("ERROR")

In [None]:
G = nx.read_weighted_edgelist(path + "guncontrol_edgelist.txt",
                              delimiter=",")
f1 = open("C:/MyPath/Guncontrol_reddit_community0.csv" )
f2 = open("C:/MyPath/Guncontrol_reddit_community1.csv" )
f3 = open(savepath + "guncontrol_reddit" + "_positions.txt");
lines3 = f3.readlines();

In [None]:
lines1 = f1.readlines()
lines1= list(filter(('\n').__ne__, lines1 ))
dict_left = {}

for line in lines1:
    line = line.strip();
    dict_left[line] = 1;
    
lines2 = f2.readlines()
lines2= list(filter(('\n').__ne__, lines2 ))
dict_right = {}

for line in lines2:
    line = line.strip();
    dict_right[line] = 1;

In [None]:
def getDistance(pointa,pointb):
    x1 = pointa[0];
    y1 = pointa[1];
    x2 = pointb[0];
    y2 = pointb[1];
    return math.sqrt((x1-x2)**2 + (y1-y2)**2)

In [None]:
dict_positions = {};
for i in range(len(lines3)):
    if lines3[i].startswith('\t'):
        continue
    else:
        line1 = lines3[i].strip();
        line1_split = line1.split("\t");
        node = line1_split[0];
        [x,y] = [float(line1_split[1].split(",")[0]),float(line1_split[1].split(",")[1])];
        dict_positions[node] = [x,y];

left_list = list(dict_left.keys());
intersect = collections.Counter(left_list) & collections.Counter(dict_positions.keys())
left_list =list(intersect.elements())
total_lib_lib = 0.0;
count_lib_lib = 0.0;
avg_lib_lib = 0.0; # average liberal to liberal distance


for i in range(len(left_list)):
    user1 = left_list[i];
    for j in range(i+1,len(left_list)):
        user2 = left_list[j]
        dist = getDistance(dict_positions[user1],dict_positions[user2]);
        total_lib_lib += dist
        count_lib_lib += 1.0

avg_lib_lib = total_lib_lib/count_lib_lib
avg_lib_lib

In [None]:
right_list = list(dict_right.keys())
intersect = collections.Counter(right_list) & collections.Counter(dict_positions.keys())
right_list =list(intersect.elements())
total_cons_cons = 0.0;
count_cons_cons = 0.0;
avg_cons_cons = 0.0; # average conservative to conservative distance


for i in range(len(right_list)):
    user1 = right_list[i]
    for j in range(i+1 ,len(right_list)):
        user2 = right_list[j]
        dist = getDistance(dict_positions[user1],dict_positions[user2])
        total_cons_cons += dist
        count_cons_cons += 1.0;
        
avg_cons_cons = total_cons_cons/count_cons_cons;

total_both = 0.0;
count_both = 0.0;
avg_both = 0.0;

for i in range(len(left_list)):
    user1 = left_list[i];
    for j in range(len(right_list)):
        user2 = right_list[j]
        dist = getDistance(dict_positions[user1],dict_positions[user2]);
        total_both += dist
        count_both += 1.0;
       
            
avg_both = total_both/count_both;

In [None]:
score = round(1 - ((avg_lib_lib + avg_cons_cons) / (2 * avg_both)), 4)
print("Embedding score: {}".format(score))