In [2]:
import praw
import numpy as np
import pandas as pd
import networkx as nx


In [3]:
# reddit parameters for using api
reddit = praw.Reddit(
    client_id = "Ml0WipdEEPOxQpUkz1IpXA",
    client_secret = "060b8giYxRkM1MnSlGWyM5iaxH861A",
    user_agent = "Medicine Analysis Bot"
)
    

In [4]:
# creation of access to subreddit posts using api
subreddit_posts = reddit.subreddit("AskDocs").top(time_filter = "month", limit = 10)

In [4]:
# grabbing list of flairs to be modified later for easy analysis
flairs = set()
for post in subreddit_posts:
    if post.author_flair_text:
        flairs.add(post.author_flair_text)
    
    post.comments.replace_more(limit=None)
      
    for comment in post.comments.list():
        if comment.author_flair_text:
            flairs.add(comment.author_flair_text)

print(flairs)
    

{'Physician | Top Contributor', 'Layperson/not verified as healthcare professional', 'Physician - Cardiology ', 'Physician - Oral and Maxillofacial Surgery', 'Registered Nurse', 'Physician - Neurology', 'Physician | Moderator | Top Contributor', 'Medical Student', 'Physician | Heme/Onc', 'Registered Midwife', 'Physician - Neurosurgery', 'Physician - Pediatrics', 'Physician Assistant', 'This user has not yet been verified.', 'Physician - Family Medicine', 'Physician - Cardiology/Electrophysiology', 'Physician/Neurosurgeon', 'Physician - Critical Care', 'Pharmacy Technician', 'Physician - Anesthesiology', 'Physician', 'Physician - Internal Medicine', 'Nurse Practitioner', 'Layperson/not verified as healthcare professional.', 'Paramedic', 'Physician - Ob/Gyn', 'Midwife', 'Pharmacist', 'Physician | Emergency Medicine', 'B.S., Medical Lab Sciences'}


In [6]:
# creation of directed graph
graph = nx.DiGraph()

In [7]:
def get_flair(post_type):
    """
    extracts reddit flair title from the user that posted 
    """
    flair = post_type.author_flair_text
    if flair is None:
        flair = "No Flair"
    return flair

In [8]:
def add_edge(author, poster, score):
    """
    adds edge to the directed graph or checks if it exists to add score
    """
    if graph.has_edge(author,poster):
        graph[author][poster]["weight"] += score
    else:
        graph.add_edge(author,poster,weight = score)


In [9]:
# accessing subreddit posts
subreddit_posts = reddit.subreddit("AskDocs").top(time_filter = "month", limit = 10)

# main code to create graph
for post in subreddit_posts:
    
    # grabs initial poster and adds a node with their flair in tact
    initial_poster = str(post.author)
    if initial_poster == "None":
        continue   
    init_post_flair = get_flair(post)
    graph.add_node(initial_poster, flair = init_post_flair)

    #allows grabbing of every comment in the thread
    post.comments.replace_more(limit=None)
    for comment in post.comments.list():

        # grabs the author of a comment for each comment in the thread with their flair and score(upvotes)
        author = str(comment.author)
        score = comment.score
        if author == "None":
            continue
        author_flair = get_flair(comment)
        graph.add_node(author, flair = author_flair)

        # adds edge between author and poster
        add_edge(author,initial_poster,score)


        # grabs each parent comment and adds with flair
        parent_comment = comment.parent()
        parent_commenter = str(parent_comment.author)
        if parent_commenter == "None":
            continue
        parent_flair = get_flair(parent_comment)
        graph.add_node(parent_commenter, flair = parent_flair)

        # adds edge between author and parent commentor
        add_edge(author, parent_commenter, score)

        # removes AutoModerator from all discussions
        for user in list(graph.nodes):
            if user == "AutoModerator":
                graph.remove_node(user)


print(f"Nodes: {len(graph.nodes())}")
print(f"Edges: {len(graph.edges())}")

Nodes: 609
Edges: 1261


In [10]:
flair_map = {
    "Midwife": "Midwife",
    "Registered Nurse": "Nurse",
    "Physician - Family Medicine": "Physician",
    "Physician": "Physician",
    "Pharmacist": "Pharmacist",
    "Licensed Alcohol and Drug Counselor": "Counselor",
    "Physician - Oral and Maxillofacial Surgery": "Physician",
    "Nurse Practitioner": "Nurse",
    "Physician/Neurosurgeon": "Physician",
    "Physician - Internal Medicine": "Physician",
    "Layperson/not verified as healthcare professional": "Layperson",
    "This user has not yet been verified.": "Layperson",
    "B.S., Medical Lab Sciences": "Scientist",
    "Physician | Moderator | Top Contributor": "Physician",
    "Physician - Neurology": "Physician",
    "Physician | Moderator": "Physician",
    "Physician - Cardiology/Electrophysiology": "Physician",
    "Physician Assistant": "Nurse",
    "Physician - Pulmonology/critical care": "Physician",
    "Medical Imaging Technologist": "Technologist",
    "Paramedic": "Paramedic",
    "Medical Student": "MedStudent",
    "Pharmacy Technician": "Technologist",
    "Physician - Pediatrics": "Physician",
    "Layperson/not verified as healthcare professional.": "Layperson",
    "Respiratory Therapist": "Therapist",
    "Physician - Cardiology ": "Physician",
    "Physician - Neurosurgery": "Physician",
    "Physician - Critical Care": "Physician"
}
 
    

In [11]:
for node, attribute in graph.nodes(data=True):
    old_flair = attribute["flair"]
    clean_flair = flair_map.get(old_flair,"No Flair")
    graph.nodes[node]["clean_flair"] = clean_flair

In [12]:
nx.write_graphml(graph, "Doctor_Advice.graphml")
print("Graph Successfully Exported to Doctor_Advice.graphml")

Graph Successfully Exported to Doctor_Advice.graphml


In [19]:
graph = nx.read_graphml("Doctor_Advice.graphml")
print("Doctor_advice.graphml Succesfully imported")

print(f"Nodes: {len(graph.nodes())}")
print(f"Edges: {len(graph.edges())}")

Doctor_advice.graphml Succesfully imported
Nodes: 609
Edges: 1261


In [20]:
top_count = 10

In [21]:
def centrality_metrics(list_of_users, type_of_centrality):
    
    for reddit_user in sorted(type_of_centrality, key = type_of_centrality.get, reverse = True)[:top_count]:
        flair = graph.nodes[reddit_user]["clean_flair"]
        centrality = type_of_centrality[reddit_user]
    
        list_of_users.append({
            "Flair": flair,
            "measure": centrality,
        })
    return list_of_users


In [22]:
centrality_deg = nx.degree_centrality(graph)
print(f"\n Top {top_count} Reddit Users by degree Centrality")

centrality_user = []
centrality_df = pd.DataFrame(centrality_metrics(centrality_user, centrality_deg))
centrality_df = centrality_df.rename(columns={"measure": "Degree Centrality"})
centrality_df.head(10)


 Top 10 Reddit Users by degree Centrality


Unnamed: 0,Flair,Degree Centrality
0,Layperson,0.366776
1,Layperson,0.241776
2,Layperson,0.15625
3,Layperson,0.15625
4,Layperson,0.111842
5,Layperson,0.070724
6,Layperson,0.069079
7,Layperson,0.059211
8,Layperson,0.052632
9,Layperson,0.047697


In [23]:
centrality_pr = nx.pagerank(graph, weight = "weight")
print(f"\n Top {top_count} Reddit Users by Pagerank")

page_rank_user = []
pagerank_df = pd.DataFrame(centrality_metrics(page_rank_user,centrality_pr))
pagerank_df = pagerank_df.rename(columns={"measure": "Pagerank"})
pagerank_df.head(10)


 Top 10 Reddit Users by Pagerank


Unnamed: 0,Flair,Pagerank
0,Layperson,0.165788
1,Layperson,0.106774
2,Layperson,0.073564
3,Layperson,0.059867
4,Layperson,0.039245
5,Layperson,0.032657
6,Layperson,0.028933
7,Layperson,0.026862
8,Layperson,0.024879
9,Physician,0.02382


In [24]:
centrality_btw = nx.betweenness_centrality(graph, k = 20)
print(f"\n Top {top_count} Reddit Users by Betweenness")

betweeness_user = []
betweenness_df = pd.DataFrame(centrality_metrics(betweeness_user, centrality_btw))
betweenness_df = betweenness_df.rename(columns={"measure": "Betweenness"})
betweenness_df.head(10)


 Top 10 Reddit Users by Betweenness


Unnamed: 0,Flair,Betweenness
0,Layperson,0.180915
1,Layperson,0.114198
2,Nurse,0.104115
3,Layperson,0.095058
4,Layperson,0.082942
5,Layperson,0.053894
6,Layperson,0.041064
7,Layperson,0.039425
8,Layperson,0.031659
9,Layperson,0.026584
