# Workshop 7: SNA Measures

COSC2671 Social Media and Network Analytics

Jeffrey Chan, RMIT University, 2022


In [1]:
%load_ext autoreload
%autoreload 1
%aimport redditClient

In [2]:
import networkx as nx

from redditClient import redditClient

import matplotlib.pyplot as plt

In [3]:
sFileName = "workshop7.graphml"

Setup Reddit client.

In [4]:
# construct Reddit client
client = redditClient()

# construct directed graph
replyGraph = nx.DiGraph()

# this dictionary used to track the ids of submissions and posts, in order for us to construct
# the links in the graph
dSubCommentId = dict()

# specify which subreddit we are interested in - 'python'
subreddit = client.subreddit('python')

# sanity check, you should see your own username printed out
print(client.user.me())

LuKrO99


Construct reply graph from the python subreddit channel.

In [5]:
# loop through the hot submissions
for submission in subreddit.hot():
    print(submission)

    # check if author name is in the reply graph - if so, we update the number of submissions
    # associated with this user
    # if not, we construct a new node with 1 associated submission
    if submission.author.name in replyGraph:
        replyGraph.nodes[submission.author.name]['subNum'] += 1
    else:
        replyGraph.add_node(submission.author.name, subNum=1)

    submissionId = submission.name;
    # this stores the submissionId (in submission.name) and associate it to the author
    # (submission.author.name).
    dSubCommentId[submissionId] = {submissionId : submission.author.name}

    # for the current submission, retrieve the associated comments
    submission.comments.replace_more(limit=None)
    for comment in submission.comments.list():

        # some data checking to cater for deleted comments
        # we only add a link if the comment hasn't been deleted
        if comment.author is not None and comment.author.name != 'ExternalUserError':
            dSubCommentId[submissionId].update({comment.name : comment.author.name})

            # check if we have seen the comment's parent yet.  If not, then parent comment has been
            # deleted
            if comment.parent_id in dSubCommentId[submissionId]:
                # if edge exists, increment the replyNum, otherwise add a new edge
                if replyGraph.has_edge(comment.author.name, dSubCommentId[submissionId][comment.parent_id]):
                    replyGraph[comment.author.name][dSubCommentId[submissionId][comment.parent_id]]['replyNum'] += 1
                else:
                    # need to check if the nodes have been added yet, if not add it and set subNum to 0
                    if not comment.author.name in replyGraph:
                        replyGraph.add_node(comment.author.name, subNum=0)

                    if not dSubCommentId[submissionId][comment.parent_id] in replyGraph:
                        replyGraph.add_node(dSubCommentId[submissionId][comment.parent_id], subNum=0)

                    replyGraph.add_edge(comment.author.name, dSubCommentId[submissionId][comment.parent_id], replyNum=1)

#
# TODO: save graph to file
#
filename = "replayGraph.graphml"
nx.write_graphml(replyGraph, filename)
print("Graph stored as " + filename)


x58m2d
x8kp0x
x8k1dh
x8ag01
x7y0xg
x8fppy
x8fqg4
x8njrc
x8nilx
x8gxhg
x8rmwj
x84av4
x8c9lz
x87oyq
x89bf5
x8s7so
x8rrk3
x7mb2s
x8ddc4
x8nzs3
x8n597
x86gok
x8gpr6
x7f9r9
x78yuc
x86pes
x76i8u
x7jofr
x7vjuv
x75qzr
x7kudf
x7o2q4
x7y5s7
x76a10
x7pxz6
x80gij
x7l6ya
x7f0dp
x6njsq
x788ac
x7f0zp
x74g4h
x7b8wq
x77ddy
x6y5s7
x6dokf
x7vpz2
x7f3oj
x73gy8
x7ctqr
x74wjm
x6hfqv
x6wugh
x6h4mw
x6nfh9
x6jsnq
x73iyd
x6mz05
x6n9d1
x70edr
x682gs
x6bt9z
x6vdb4
x5op7z
x6m6hz
x61lfg
x5wyvp
x5gjpv
x6ipae
x6hynp
x6ljtx
x6fgvn
x63nlg
x6gb21
x6cn0f
x6b659
x5yrlx
x4u928
x5nusn
x5p62u
x4xlvh
x5rdrz
x4qpf5
x5omlj
x518ju
x5myqz
x5ryg8
x5lm9a
x51q3z
x5hr9r
x4srau
x581xo
x532hc
x4gpuj
x5e8qg
x4ej6c
x50h2n
x521we
x4hq6e
x50q9x
Graph stored as replayGraph.graphml
