In [180]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from pyvis.network import Network

import networkx as nx
file_path = "./Wihardja/wihardja_"

In [181]:
compiled_df = pd.read_csv("../Clean Data/clean_consolidated_wihardja (forum).csv") 

In [182]:
compiled_df.columns

Index(['Unnamed: 0', 'Author', 'Body', 'Reply to', 'Message Replying to',
       'Permalink', 'Score', 'TimeStamp', 'Year', 'Source'],
      dtype='object')

In [183]:
list(compiled_df["Source"].unique())


['Singapore Expat', 'HWZ']

In [184]:
hwz_df = compiled_df[compiled_df["Source"] == "HWZ"]
reddit_df = compiled_df[compiled_df["Source"] == "Reddit"]
expat_df = compiled_df[compiled_df["Source"] == "Singapore Expat"]


In [185]:
print(hwz_df.shape)
print(reddit_df.shape)
print(expat_df.shape)

(6, 10)
(0, 10)
(1, 10)


In [186]:
hwz_df["Reply to"].value_counts()


Mr.Canberra      1
The_King         1
karagiselle      1
americanhippo    1
Name: Reply to, dtype: int64

In [187]:
G = nx.Graph()

In [188]:
def color(size):
    if size > 4 :
        return "red"
    elif size > 2:
        return "green"
    return "blue"

nodes = [author for author in hwz_df["Author"].values] + [reply_to for reply_to in hwz_df["Reply to"].values]
nodes = [node for node in nodes if type(node) == str]

edges = [(author, reply_to) for author, reply_to in hwz_df[["Author", "Reply to"]].values]
edges = [edge for edge in edges if (type(edge[0]) == str and type(edge[1]) == str)]
pairwise_weight = {edge: 1 for edge in edges }
for indexOuter, itemOuter in enumerate(pairwise_weight):
    leftO, rightO = itemOuter
    for indexInner, itemInner in enumerate(pairwise_weight):
        leftI, rightI = itemInner
        if indexInner > indexOuter and leftO == rightI and rightO == leftI:
            pairwise_weight[itemInner] += 1
            pairwise_weight[itemOuter] = 0

pairwise_weight = {key: pairwise_weight[key] for key in pairwise_weight if pairwise_weight[key] > 0}
weighted_edges = [(key[0], key[1], 2**pairwise_weight[key] ) for key in pairwise_weight]

degrees = {node : 1 for node in nodes}

for author in hwz_df["Author"].values:
    if author in degrees:
        degrees[author] += 1

nodes = [(node, {"size" : 2*degrees[node], "color" : color(degrees[node])}) for node in nodes]



In [189]:
G.add_nodes_from(nodes)
G.add_weighted_edges_from(weighted_edges)

In [190]:
nt = Network('1000px', '1000px')
# populates the nodes and edges data structures

nt.from_nx(G)
nt.show(file_path+'hwz_network.html')


In [191]:
reddit_df["Reply to"].value_counts()

Series([], Name: Reply to, dtype: int64)

In [192]:
G= nx.Graph()

In [193]:
def color(size):
    if size > 4 :
        return "red"
    elif size > 2:
        return "green"
    return "blue"

nodes = [author for author in reddit_df["Author"].values] + [reply_to for reply_to in reddit_df["Reply to"].values]
nodes = [node for node in nodes if type(node) == str or node.strip() != "-"]

edges = [(author, reply_to) for author, reply_to in reddit_df[["Author", "Reply to"]].values]
edges = [edge for edge in edges if (type(edge[0]) == str and type(edge[1]) == str and edge[0].strip() != "-" and edge[1].strip() != "-")]
pairwise_weight = {edge: 1 for edge in edges }
for indexOuter, itemOuter in enumerate(pairwise_weight):
    leftO, rightO = itemOuter
    for indexInner, itemInner in enumerate(pairwise_weight):
        leftI, rightI = itemInner
        if indexInner > indexOuter and leftO == rightI and rightO == leftI:
            pairwise_weight[itemInner] += 1
            pairwise_weight[itemOuter] = 0

pairwise_weight = {key: pairwise_weight[key] for key in pairwise_weight if pairwise_weight[key] > 0}
weighted_edges = [(key[0], key[1], 10**pairwise_weight[key] ) for key in pairwise_weight]

degrees = {node : 1 for node in nodes}

for author in hwz_df["Author"].values:
    if author in degrees:
        degrees[author] += 1

nodes = [(node, {"size" : 2*degrees[node], "color" : color(degrees[node])}) for node in nodes]

In [194]:
G.add_nodes_from(nodes)
G.add_weighted_edges_from(weighted_edges)

In [195]:
nt = Network('800px', '800px')
# populates the nodes and edges data structures

nt.from_nx(G)
nt.show(file_path+'reddit_network.html')

In [196]:
expat_df["Reply to"].value_counts()

Toystory    1
Name: Reply to, dtype: int64

In [197]:
G= nx.Graph()

In [198]:
def color(size):
    if size > 4 :
        return "red"
    elif size > 2:
        return "green"
    return "blue"

nodes = [author for author in expat_df["Author"].values] + [reply_to for reply_to in expat_df["Reply to"].values]
nodes = [node for node in nodes if type(node) == str or node.strip() != "-"]

edges = [(author, reply_to) for author, reply_to in expat_df[["Author", "Reply to"]].values]
edges = [edge for edge in edges if (type(edge[0]) == str and type(edge[1]) == str)]
pairwise_weight = {edge: 1 for edge in edges }
for indexOuter, itemOuter in enumerate(pairwise_weight):
    leftO, rightO = itemOuter
    for indexInner, itemInner in enumerate(pairwise_weight):
        leftI, rightI = itemInner
        if indexInner > indexOuter and leftO == rightI and rightO == leftI:
            pairwise_weight[itemInner] += 1
            pairwise_weight[itemOuter] = 0

pairwise_weight = {key: pairwise_weight[key] for key in pairwise_weight if pairwise_weight[key] > 0}
weighted_edges = [(key[0], key[1], 10**pairwise_weight[key] ) for key in pairwise_weight]

degrees = {node : 1 for node in nodes}

for author in hwz_df["Author"].values:
    if author in degrees:
        degrees[author] += 1

nodes = [(node, {"size" : 2*degrees[node], "color" : color(degrees[node])}) for node in nodes]

In [199]:
G.add_nodes_from(nodes)
G.add_weighted_edges_from(weighted_edges)

In [200]:
nt = Network('800px', '800px')
# populates the nodes and edges data structures

nt.from_nx(G)
nt.show(file_path+'expat_network.html')