In [131]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from pyvis.network import Network

import networkx as nx
file_path = "./Castlery/castlery_"

In [132]:
compiled_df = pd.read_csv("../Clean Data/clean_consolidated_castlery (forum).csv") 

In [133]:
compiled_df.columns

Index(['Unnamed: 0', 'Author', 'Body', 'Reply to', 'Message Replying to',
       'Permalink', 'Score', 'Subpage/Subreddit', 'TimeStamp', 'Year',
       'Source'],
      dtype='object')

In [134]:
list(compiled_df["Source"].unique())


['Reddit', 'Singapore Expat', 'HardwareZone']

In [135]:
hwz_df = compiled_df[compiled_df["Source"] == "HardwareZone"]
reddit_df = compiled_df[compiled_df["Source"] == "Reddit"]
expat_df = compiled_df[compiled_df["Source"] == "Singapore Expat"]


In [136]:
print(hwz_df.shape)
print(reddit_df.shape)
print(expat_df.shape)

(96, 11)
(31, 11)
(1, 11)


In [137]:
hwz_df["Reply to"].value_counts()


Gixxerfied             20
purpleberry             6
pacificstorm            4
d3n                     3
snowydust               3
DarthGW                 2
ironut                  2
jacko123                2
hwaa                    2
Asakura                 2
donnatona               2
popdod                  2
Elfblade                1
Bunny2016               1
orbvius                 1
cybercom8               1
BizarreLoveTriangle     1
chewchun                1
AspireX                 1
Ivanlee80               1
trepies                 1
AnTzX_                  1
kaypohchee              1
Niccolo Machiavelli     1
KnightDroid             1
gnoes85                 1
walkofwinners           1
Repsol_2                1
mero_cwj                1
dilphinus               1
_Dave_                  1
Name: Reply to, dtype: int64

In [138]:
G = nx.Graph()

In [139]:
def color(size):
    if size > 4 :
        return "red"
    elif size > 2:
        return "green"
    return "blue"

nodes = [author for author in hwz_df["Author"].values] + [reply_to for reply_to in hwz_df["Reply to"].values]
nodes = [node for node in nodes if type(node) == str]

edges = [(author, reply_to) for author, reply_to in hwz_df[["Author", "Reply to"]].values]
edges = [edge for edge in edges if (type(edge[0]) == str and type(edge[1]) == str)]
pairwise_weight = {edge: 1 for edge in edges }
for indexOuter, itemOuter in enumerate(pairwise_weight):
    leftO, rightO = itemOuter
    for indexInner, itemInner in enumerate(pairwise_weight):
        leftI, rightI = itemInner
        if indexInner > indexOuter and leftO == rightI and rightO == leftI:
            pairwise_weight[itemInner] += 1
            pairwise_weight[itemOuter] = 0

pairwise_weight = {key: pairwise_weight[key] for key in pairwise_weight if pairwise_weight[key] > 0}
weighted_edges = [(key[0], key[1], 2**pairwise_weight[key] ) for key in pairwise_weight]

degrees = {node : 1 for node in nodes}

for author in hwz_df["Author"].values:
    if author in degrees:
        degrees[author] += 1

nodes = [(node, {"size" : 2*degrees[node], "color" : color(degrees[node])}) for node in nodes]



In [140]:
G.add_nodes_from(nodes)
G.add_weighted_edges_from(weighted_edges)

In [141]:
nt = Network('1000px', '1000px')
# populates the nodes and edges data structures

nt.from_nx(G)
nt.show(file_path+'hwz_network.html')


In [142]:
reddit_df["Reply to"].value_counts()

-    31
Name: Reply to, dtype: int64

In [143]:
G= nx.Graph()

In [144]:
def color(size):
    if size > 4 :
        return "red"
    elif size > 2:
        return "green"
    return "blue"

nodes = [author for author in reddit_df["Author"].values] + [reply_to for reply_to in reddit_df["Reply to"].values]
nodes = [node for node in nodes if type(node) == str or node.strip() != "-"]

edges = [(author, reply_to) for author, reply_to in reddit_df[["Author", "Reply to"]].values]
edges = [edge for edge in edges if (type(edge[0]) == str and type(edge[1]) == str and edge[0].strip() != "-" and edge[1].strip() != "-")]
pairwise_weight = {edge: 1 for edge in edges }
for indexOuter, itemOuter in enumerate(pairwise_weight):
    leftO, rightO = itemOuter
    for indexInner, itemInner in enumerate(pairwise_weight):
        leftI, rightI = itemInner
        if indexInner > indexOuter and leftO == rightI and rightO == leftI:
            pairwise_weight[itemInner] += 1
            pairwise_weight[itemOuter] = 0

pairwise_weight = {key: pairwise_weight[key] for key in pairwise_weight if pairwise_weight[key] > 0}
weighted_edges = [(key[0], key[1], 10**pairwise_weight[key] ) for key in pairwise_weight]

degrees = {node : 1 for node in nodes}

for author in hwz_df["Author"].values:
    if author in degrees:
        degrees[author] += 1

nodes = [(node, {"size" : 2*degrees[node], "color" : color(degrees[node])}) for node in nodes]

In [145]:
G.add_nodes_from(nodes)
G.add_weighted_edges_from(weighted_edges)

In [146]:
nt = Network('800px', '800px')
# populates the nodes and edges data structures

nt.from_nx(G)
nt.show(file_path+'reddit_network.html')

In [147]:
expat_df["Reply to"].value_counts()

ropentie     1
Name: Reply to, dtype: int64

In [148]:
G= nx.Graph()

In [149]:
def color(size):
    if size > 4 :
        return "red"
    elif size > 2:
        return "green"
    return "blue"

nodes = [author for author in expat_df["Author"].values] + [reply_to for reply_to in expat_df["Reply to"].values]
nodes = [node for node in nodes if type(node) == str or node.strip() != "-"]

edges = [(author, reply_to) for author, reply_to in expat_df[["Author", "Reply to"]].values]
edges = [edge for edge in edges if (type(edge[0]) == str and type(edge[1]) == str)]
pairwise_weight = {edge: 1 for edge in edges }
for indexOuter, itemOuter in enumerate(pairwise_weight):
    leftO, rightO = itemOuter
    for indexInner, itemInner in enumerate(pairwise_weight):
        leftI, rightI = itemInner
        if indexInner > indexOuter and leftO == rightI and rightO == leftI:
            pairwise_weight[itemInner] += 1
            pairwise_weight[itemOuter] = 0

pairwise_weight = {key: pairwise_weight[key] for key in pairwise_weight if pairwise_weight[key] > 0}
weighted_edges = [(key[0], key[1], 10**pairwise_weight[key] ) for key in pairwise_weight]

degrees = {node : 1 for node in nodes}

for author in hwz_df["Author"].values:
    if author in degrees:
        degrees[author] += 1

nodes = [(node, {"size" : 2*degrees[node], "color" : color(degrees[node])}) for node in nodes]

In [150]:
G.add_nodes_from(nodes)
G.add_weighted_edges_from(weighted_edges)

In [151]:
nt = Network('800px', '800px')
# populates the nodes and edges data structures

nt.from_nx(G)
nt.show(file_path+'expat_network.html')