# Building the Senator Graph

This notebook provides the code for integrating all the senator, twitter, roll call, contributions, and named entities data into a neo4j grpah

In [7]:
import pandas as pd
import neo4j

In [2]:
# Created neo4j desktop dbms with password "senate"
driver = neo4j.GraphDatabase.driver(uri="bolt://localhost:7687", auth=("neo4j", "senate"))

# Got import dir from neo4j desktop browser
neo4j_import_dir = "/Users/zacharywallace/Library/Application Support/Neo4j Desktop/Application/relate-data/dbmss/dbms-3ae47622-2d3d-4cb6-8765-69856981af92/import"

In [3]:

def run(query):
    data = []
    with driver.session() as session:
        result = session.run(query)
        for record in result:
            data.append(record.data())
    return pd.DataFrame(data)

In [4]:
run("""
MATCH (n)
DETACH DELETE n
""")

## Load senator nodes

In [5]:
senators = pd.read_csv("all_senate_data.csv")

code_to_party = {100: "democrat", 200: "republican"}
def create_senator(senator):
    node_attrs = {
        "name": senator["Name"],
        "party": code_to_party.get(senator["party_code"], "other"),
        "state": senator["state_abbrev"],
        "icpsr": senator["icpsr"],
        "twitter": senator["Twitter_username"]
    }
    return pd.Series(node_attrs)
senator_nodes = senators.apply(create_senator, axis=1)
senator_nodes.to_csv(f"{neo4j_import_dir}/senator_nodes.csv")

In [6]:
# Load senators
run("""
LOAD CSV WITH HEADERS FROM "file:///senator_nodes.csv" as row
MERGE (s:Senator {name:row.name, party:row.party, state: row.state, twitter_username:row.twitter, id: toInteger(row.icpsr)})
""")

## Load roll call nodes

In [8]:

rollcalls = pd.read_csv("S117_rollcalls.csv")

def create_rollcall(rollcall):
    node_attrs = {
        "rollnumber": rollcall["rollnumber"],
        "date": rollcall["date"],
        "yea_count": rollcall["yea_count"],
        "nea_count": rollcall["nay_count"],
        "margin": abs(rollcall["yea_count"] - rollcall["nay_count"]),
        "result": rollcall["vote_result"],
        "desc": rollcall["vote_desc"],
        "bill_number": rollcall["bill_number"],
    }
    return pd.Series(node_attrs)

rollcall_nodes = rollcalls.apply(create_rollcall, axis=1)
rollcall_nodes["bill_number"] = rollcall_nodes["bill_number"].fillna("N/A")
rollcall_nodes.to_csv(f"{neo4j_import_dir}/rollcall_nodes.csv")

In [9]:

# Load all rollcalls
run("""
LOAD CSV WITH HEADERS FROM "file:///rollcall_nodes.csv" as row
WITH row WHERE row.desc is not null
MERGE (r:Rollcall {rollnumber: toInteger(row.rollnumber), date: row.date, yea_count: toInteger(row.yea_count), 
    nea_count: toInteger(row.nea_count), margin: toInteger(row.margin), result: row.result, desc: row.desc, bill_number: row.bill_number})
""")

## Load senator - roll call voting results as relationships

In [10]:
votes = pd.read_csv("S117_votes.csv")
vote_types = {
    1: "yea",
    6: "nay",
}
def create_vote_rel(vote):
    icpsr = vote["icpsr"]
    rollnumber = vote["rollnumber"]
    vote_type = vote_types.get(vote["cast_code"], "other")

    return pd.Series({
        "senator_id": int(icpsr),
        "rollnumber": int(rollnumber),
        "vote_type": vote_type
    })
vote_rels = votes.apply(create_vote_rel, axis=1)
vote_rels.to_csv(f"{neo4j_import_dir}/vote_relationships.csv")


In [11]:
# Load relationships between votes
run("""
LOAD CSV WITH HEADERS FROM "file:///vote_relationships.csv" as row
MATCH
  (s:Senator {id: toInteger(row.senator_id)}),
  (r:Rollcall {rollnumber: toInteger(row.rollnumber)})
MERGE (s)-[v:Voted {vote_type: row.vote_type}]->(r)
""")

## Load Tweet nodes

In [12]:
tweets = pd.read_csv("direct_tweet_attribs.csv")
tweets["id"] = list(range(0, len(tweets)))
tweets.to_csv(f"{neo4j_import_dir}/tweets.csv")

In [13]:
# Load tweets as nodes
run("""
LOAD CSV WITH HEADERS FROM "file:///tweets.csv" as row
MERGE (t:Tweet {id: toInteger(row.id), text: row.Tweet, retweets: toInteger(row.Retweets), replies: toInteger(row.Replies),
    likes: toInteger(row.Likes), quotes: toInteger(row.Quotes)})
""")

In [14]:
# Load relationships between senator twitter account and tweet
run("""
LOAD CSV WITH HEADERS FROM "file:///tweets.csv" as row
MATCH
  (t:Tweet {id: toInteger(row.id)}),
  (s:Senator {twitter_username: row.TwitterID})
MERGE (s)-[tw:Tweeted]->(t)
""")

## Load tweet and roll call description named entities as nodes, create relationships

In [15]:
tweet_ners = pd.read_csv("direct_tweet_ners.csv")
tweet_ners["Text"] = tweet_ners["Text"].str.lower()

roll_ners = pd.read_csv("rollcall_ners.csv")
roll_ners["Text"] = roll_ners["Text"].str.lower()

tweet_ners.to_csv(f"{neo4j_import_dir}/tweet_ners.csv")
roll_ners.to_csv(f"{neo4j_import_dir}/roll_ners.csv")

In [16]:
# Load entities from tweets as nodes
run("""
LOAD CSV WITH HEADERS FROM "file:///tweet_ners.csv" as row
MERGE (e:Entity {type: row.NER_Lable, name: row.Text})
""")

# Load in entities of roll calls as additional nodes
run("""
LOAD CSV WITH HEADERS FROM "file:///roll_ners.csv" as row
MERGE (e:Entity {type: row.NER_Lable, name: row.Text})
""")

In [17]:
# Load relationships between tweets and named entities
run("""
LOAD CSV WITH HEADERS FROM "file:///tweet_ners.csv" as row
MATCH
  (t:Tweet {text: row.Tweet}),
  (e:Entity {name: row.Text})
MERGE (t)-[m:Mentions]->(e)
""")

In [18]:
# Load relationships between roll call descriptions and named entities
run("""
LOAD CSV WITH HEADERS FROM "file:///roll_ners.csv" as row
MATCH
  (t:Rollcall {desc: row.vote_desc}),
  (e:Entity {name: row.Text})
MERGE (t)-[m:Mentions]->(e)
""")

## Load contributions to senators as nodes, create relationships

In [19]:
contributions = pd.read_csv("contributions_with_icpsr.csv")
contributions = contributions.dropna()
contributions["contributor"] = contributions["contributor"].str.lower()
contributions["contributor"] = contributions["contributor"].str.strip()
contributions.to_csv(f"{neo4j_import_dir}/contribution_relationships.csv")

In [20]:
# Load entities for donations/contributors
run("""
LOAD CSV WITH HEADERS FROM "file:///contribution_relationships.csv" as row
MERGE (c:Contributor {name: row.contributor})
""")

# Load relationships between senators and their contributors
run("""
LOAD CSV WITH HEADERS FROM "file:///contribution_relationships.csv" as row
MATCH
  (c:Contributor {name: row.contributor}),
  (s:Senator {id: ToInteger(row.icpsr)})
MERGE (c)-[d:DonatedTo {total: ToInteger(row.total), pac: ToInteger(row.pac), individual: ToInteger(row.individual), race:row.race}]->(s)
""")