In [97]:
import pandas as pd
import neo4j
import math

In [98]:
# Created neo4j desktop dbms with password "senate"
driver = neo4j.GraphDatabase.driver(uri="bolt://localhost:7687", auth=("neo4j", "senate"))

# Got import dir from neo4j desktop browser
neo4j_import_dir = "/Users/aidankeogh/Library/Application Support/Neo4j Desktop/Application/relate-data/dbmss/dbms-dffef9db-3d5a-4ba6-aebc-aadc8b8f5988/import"

In [99]:

def run(query):
    with driver.session() as session:
        result = session.run(query)
        for record in result:
            print(record)

In [100]:
run("""
MATCH (n)
DETACH DELETE n
""")

In [101]:
senators = pd.read_csv("all_senate_data.csv")

code_to_party = {100: "democrat", 200: "republican"}
def create_senator(senator):
    node_attrs = {
        "name": senator["Name"],
        "party": code_to_party.get(senator["party_code"], "other"),
        "state": senator["state_abbrev"],
        "icpsr": senator["icpsr"],
        "twitter": senator["Twitter_username"]
    }
    return pd.Series(node_attrs)
senator_nodes = senators.apply(create_senator, axis=1)
senator_nodes.to_csv(f"{neo4j_import_dir}/senator_nodes.csv")

In [102]:
# Load senators
run("""
LOAD CSV WITH HEADERS FROM "file:///senator_nodes.csv" as row
MERGE (s:Senator {name:row.name, party:row.party, state: row.state, twitter_username:row.twitter, id: toInteger(row.icpsr)})
""")

In [103]:
# Find senators who aren't republicans or democrats
run("""
MATCH (s: Senator {party: "other"})
RETURN s.name, s.party, s.id
""")

<Record s.name='Angus King' s.party='other' s.id=41300>
<Record s.name='Bernie Sanders' s.party='other' s.id=29147>


In [104]:

rollcalls = pd.read_csv("S117_rollcalls.csv")

def create_rollcall(rollcall):
    node_attrs = {
        "rollnumber": rollcall["rollnumber"],
        "date": rollcall["date"],
        "yea_count": rollcall["yea_count"],
        "nea_count": rollcall["nay_count"],
        "margin": abs(rollcall["yea_count"] - rollcall["nay_count"]),
        "result": rollcall["vote_result"],
        "desc": rollcall["vote_desc"],
        "bill_number": rollcall["bill_number"],
    }
    return pd.Series(node_attrs)

rollcall_nodes = rollcalls.apply(create_rollcall, axis=1)
rollcall_nodes["bill_number"] = rollcall_nodes["bill_number"].fillna("N/A")
rollcall_nodes.to_csv(f"{neo4j_import_dir}/rollcall_nodes.csv")

In [105]:

# Load all rollcalls
run("""
LOAD CSV WITH HEADERS FROM "file:///rollcall_nodes.csv" as row
WITH row WHERE row.desc is not null
MERGE (r:Rollcall {rollnumber: toInteger(row.rollnumber), date: row.date, yea_count: toInteger(row.yea_count), 
    nea_count: toInteger(row.nea_count), result: row.result, desc: row.desc, bill_number: row.bill_number})
""")

In [106]:
# Get all rollcalls with over 90 yeas
run("""
MATCH (r: Rollcall)
WHERE r.yea_count > 90
RETURN r.rollnumber, r.desc, r.yea_count
ORDER by r.yea_count DESC
""")

<Record r.rollnumber=16 r.desc='To establish a deficit-neutral reserve fund relating to prohibiting legislation that would increase taxes on small businesses during any period in which a national emergency has been declared with respect to a pandemic.' r.yea_count=100>
<Record r.rollnumber=21 r.desc='To establish a deficit-neutral reserve fund relating to COVID-19 vaccine administration and a public awareness campaign.' r.yea_count=100>
<Record r.rollnumber=27 r.desc='To establish a deficit-neutral reserve fund relating to improving services and interventions relating to sexual assault, family violence, domestic violence, dating violence, and child abuse.' r.yea_count=100>
<Record r.rollnumber=35 r.desc='To establish a deficit-neutral fund relating to funding the police.' r.yea_count=100>
<Record r.rollnumber=272 r.desc='A bill to deposit certain funds into the Crime Victims Fund, to waive matching requirements, and for other purposes.' r.yea_count=100>
<Record r.rollnumber=659 r.desc=

In [107]:
votes = pd.read_csv("S117_votes.csv")
vote_types = {
    1: "yea",
    6: "nay",
}
def create_vote_rel(vote):
    icpsr = vote["icpsr"]
    rollnumber = vote["rollnumber"]
    vote_type = vote_types.get(vote["cast_code"], "other")

    return pd.Series({
        "senator_id": int(icpsr),
        "rollnumber": int(rollnumber),
        "vote_type": vote_type
    })
vote_rels = votes.apply(create_vote_rel, axis=1)
vote_rels.to_csv(f"{neo4j_import_dir}/vote_relationships.csv")


In [108]:
# Load relationships between votes
run("""
LOAD CSV WITH HEADERS FROM "file:///vote_relationships.csv" as row
MATCH
  (s:Senator {id: toInteger(row.senator_id)}),
  (r:Rollcall {rollnumber: toInteger(row.rollnumber)})
MERGE (s)-[v:Voted {vote_type: row.vote_type}]->(r)
""")

In [109]:
# (TEST) Get all bills bernie sanders voted on
run("""
MATCH (s:Senator {name: "Bernie Sanders"})-[v:Voted {vote_type: "yea"}]-(r:Rollcall)
RETURN v.vote_type, r.desc, r.bill_number
""")

<Record v.vote_type='yea' r.desc='A bill to improve health care and benefits for veterans exposed to toxic substances, and for other purposes.' r.bill_number='HR3967'>
<Record v.vote_type='yea' r.desc='Radhika Fox, of California, to be an Assistant Administrator of the Environmental Protection Agency' r.bill_number='PN444'>
<Record v.vote_type='yea' r.desc='Julia Ruth Gordon, of Maryland, to be an Assistant Secretary of Housing and Urban Development' r.bill_number='PN1523'>
<Record v.vote_type='yea' r.desc='Daniel J. Kritenbrink, of Virginia, to be an Assistant Secretary of State (East Asian and Pacific Affairs)' r.bill_number='PN494'>
<Record v.vote_type='yea' r.desc='Rachel Leland Levine, of Pennsylvania, to be an Assistant Secretary of Health and Human Services' r.bill_number='PN121'>
<Record v.vote_type='yea' r.desc='Hampton Y. Dellinger, of North Carolina, to be an Assistant Attorney General' r.bill_number='PN765'>
<Record v.vote_type='yea' r.desc='Xavier Becerra, of California, t

In [110]:
tweets = pd.read_csv("direct_tweet_attribs.csv")
tweets["id"] = list(range(0, len(tweets)))
tweets.to_csv(f"{neo4j_import_dir}/tweets.csv")

In [111]:
# Load tweets as nodes
run("""
LOAD CSV WITH HEADERS FROM "file:///tweets.csv" as row
MERGE (t:Tweet {id: toInteger(row.id), text: row.Tweet, retweets: toInteger(row.Retweets), replies: toInteger(row.Replies),
    likes: toInteger(row.Likes), quotes: toInteger(row.Quotes)})
""")

In [112]:
# Load relationships between twitter account and tweet
run("""
LOAD CSV WITH HEADERS FROM "file:///tweets.csv" as row
MATCH
  (t:Tweet {id: toInteger(row.id)}),
  (s:Senator {twitter_username: row.TwitterID})
MERGE (s)-[tw:Tweeted]->(t)
""")

In [113]:
# (TEST) Get all of Bernie Sanders' tweets, ordered by likes
run("""
MATCH (s:Senator {name: "Bernie Sanders"})-[tw:Tweeted]->(t:Tweet)
RETURN t.text, t.likes
ORDER BY t.likes DESC
""")

<Record t.text="Let's be clear. Eli Lilly should apologize for increasing the price of insulin by over 1,200% since 1996 to $275 while it costs less than $10 to manufacture. The inventors of insulin sold their patents in 1923 for $1 to save lives, not to make Eli Lilly's CEO obscenely rich." t.likes=481505>
<Record t.text='The people of Brazil have voted for democracy, workers’ rights and environmental sanity.\xa0 I congratulate @LulaOficial on his hard fought victory and look forward to a strong and prosperous relationship between the United States and Brazil.' t.likes=180877>
<Record t.text='Progressive candidates had a great election night. I want to thank everyone who got out there and helped make this happen. There will now be more strong progressives in the U.S. House than ever before. I’d like to quickly highlight a few of them.' t.likes=39733>
<Record t.text="What's going on in this country right now is class warfare. Unfortunately, the wrong side is winning. Our job now is to 

In [114]:
tweet_ners = pd.read_csv("direct_tweet_ners.csv")
tweet_ners["Text"] = tweet_ners["Text"].str.lower()

roll_ners = pd.read_csv("rollcall_ners.csv")
roll_ners["Text"] = roll_ners["Text"].str.lower()

tweet_ners.to_csv(f"{neo4j_import_dir}/tweet_ners.csv")
roll_ners.to_csv(f"{neo4j_import_dir}/roll_ners.csv")

In [115]:
# Load entities from tweets as nodes
run("""
LOAD CSV WITH HEADERS FROM "file:///tweet_ners.csv" as row
MERGE (e:Entity {type: row.NER_Lable, name: row.Text})
""")

# Load in entities of roll calls as additional nodes
run("""
LOAD CSV WITH HEADERS FROM "file:///roll_ners.csv" as row
MERGE (e:Entity {type: row.NER_Lable, name: row.Text})
""")

In [116]:
# Load relationships between tweets and named entities
run("""
LOAD CSV WITH HEADERS FROM "file:///tweet_ners.csv" as row
MATCH
  (t:Tweet {text: row.Tweet}),
  (e:Entity {name: row.Text})
MERGE (t)-[m:Mentions]->(e)
""")

In [117]:
# Load relationships between roll call descriptions and named entities
run("""
LOAD CSV WITH HEADERS FROM "file:///roll_ners.csv" as row
MATCH
  (t:Rollcall {desc: row.vote_desc}),
  (e:Entity {name: row.Text})
MERGE (t)-[m:Mentions]->(e)
""")

In [118]:
# (TEST) Get the organization entities rollcalls that Bernie Sanders voted for that are mentioned most in yea votes
run("""
MATCH (s:Senator {name: "Bernie Sanders"})-[v:Voted {vote_type: "yea"}]->(r:Rollcall)-[m:Mentions]->(e:Entity {type: "ORG"})
RETURN e.name, count(e)
ORDER BY count(e) DESC
""")

<Record e.name='state' count(e)=18>
<Record e.name='treasury' count(e)=13>
<Record e.name='the environmental protection agency' count(e)=12>
<Record e.name='defense' count(e)=12>
<Record e.name='washington' count(e)=11>
<Record e.name='the united states postal service' count(e)=10>
<Record e.name='the federal reserve system' count(e)=10>
<Record e.name='labor' count(e)=10>
<Record e.name='the superior court' count(e)=9>
<Record e.name='the ninth circuit' count(e)=9>
<Record e.name='the board of governors' count(e)=8>
<Record e.name='the national labor relations board' count(e)=7>
<Record e.name='the second circuit' count(e)=6>
<Record e.name='the office of management and budget' count(e)=6>
<Record e.name='department of homeland security' count(e)=6>
<Record e.name='housing and urban development' count(e)=5>
<Record e.name='the united states agency for international development' count(e)=5>
<Record e.name='energy' count(e)=5>
<Record e.name='department of education' count(e)=5>
<Record

In [119]:
# (TEST) Get the organization entities bernie sanders tweets about the most
run("""
MATCH (s:Senator {name: "Bernie Sanders"})-[tw:Tweeted]->(t:Tweet)-[m:Mentions]->(e:Entity {type: "ORG"})
RETURN e.name, count(e)
ORDER BY count(e) DESC
""")

<Record e.name='congress' count(e)=13>
<Record e.name='texas' count(e)=11>
<Record e.name='nevada' count(e)=3>
<Record e.name='washington' count(e)=3>
<Record e.name='senate' count(e)=3>
<Record e.name='@summerforpa' count(e)=2>
<Record e.name='social security' count(e)=2>
<Record e.name='medicare for all' count(e)=2>
<Record e.name='the u.s. house' count(e)=2>
<Record e.name='house' count(e)=2>
<Record e.name='starbucks' count(e)=2>
<Record e.name='the university of california' count(e)=1>
<Record e.name="eli lilly's" count(e)=1>
<Record e.name='eli lilly' count(e)=1>
<Record e.name='the republican party' count(e)=1>
<Record e.name='exxon' count(e)=1>
<Record e.name='medicaid' count(e)=1>
<Record e.name='medicare' count(e)=1>
<Record e.name='uaw' count(e)=1>
<Record e.name='local' count(e)=1>
<Record e.name='vermonters' count(e)=1>
<Record e.name='@nextgenamerica' count(e)=1>
<Record e.name='the u.s. congress' count(e)=1>
<Record e.name='aipac' count(e)=1>


In [120]:
# (TEST) Get the organization entities democrats tweet about the most
run("""
MATCH (s:Senator {party: "republican"})-[tw:Tweeted]->(t:Tweet)-[m:Mentions]->(e:Entity {type: "ORG"})
RETURN e.name, count(e)
ORDER BY count(e) DESC
""")

<Record e.name='biden' count(e)=421>
<Record e.name='senate' count(e)=176>
<Record e.name='congress' count(e)=99>
<Record e.name='washington' count(e)=72>
<Record e.name='gop' count(e)=47>
<Record e.name='dems' count(e)=45>
<Record e.name='fbi' count(e)=37>
<Record e.name='iowa' count(e)=35>
<Record e.name='house' count(e)=33>
<Record e.name='irs' count(e)=32>
<Record e.name='amp' count(e)=32>
<Record e.name='idaho' count(e)=32>
<Record e.name='hoosiers' count(e)=26>
<Record e.name='nato' count(e)=25>
<Record e.name='covid' count(e)=22>
<Record e.name='@senategop' count(e)=21>
<Record e.name='the white house' count(e)=19>
<Record e.name='@usmc' count(e)=18>
<Record e.name='ccp' count(e)=17>
<Record e.name='administration' count(e)=16>
<Record e.name='the biden administration' count(e)=16>
<Record e.name='@nasa' count(e)=16>
<Record e.name='warnock' count(e)=16>
<Record e.name='north dakota' count(e)=16>
<Record e.name='doj' count(e)=15>
<Record e.name='nd' count(e)=15>
<Record e.name='

In [121]:
run("""
MATCH (t:Tweet)-[m:Mentions]->(e:Entity)
RETURN e.name, count(e)
ORDER BY count(e) DESC
""")

<Record e.name='biden' count(e)=1602>
<Record e.name='today' count(e)=1036>
<Record e.name='american' count(e)=486>
<Record e.name='americans' count(e)=474>
<Record e.name='senate' count(e)=443>
<Record e.name='democrats' count(e)=404>
<Record e.name='amp' count(e)=385>
<Record e.name='washington' count(e)=348>
<Record e.name='america' count(e)=344>
<Record e.name='first' count(e)=261>
<Record e.name='congress' count(e)=253>
<Record e.name='republicans' count(e)=251>
<Record e.name='election day' count(e)=244>
<Record e.name='u.s.' count(e)=235>
<Record e.name='covid' count(e)=203>
<Record e.name='one' count(e)=186>
<Record e.name='alaska' count(e)=168>
<Record e.name='the united states' count(e)=158>
<Record e.name='utah' count(e)=138>
<Record e.name='republican' count(e)=126>
<Record e.name='tomorrow' count(e)=119>
<Record e.name='ukraine' count(e)=118>
<Record e.name='tonight' count(e)=117>
<Record e.name='5' count(e)=115>
<Record e.name='this week' count(e)=114>
<Record e.name='nev

In [122]:
contributions = pd.read_csv("contributions_with_icpsr.csv")
contributions = contributions.dropna()
contributions["contributor"] = contributions["contributor"].str.lower()
contributions["contributor"] = contributions["contributor"].str.strip()
contributions.to_csv(f"{neo4j_import_dir}/contribution_relationships.csv")

In [123]:
# Load entities for donations/contributors
run("""
LOAD CSV WITH HEADERS FROM "file:///contribution_relationships.csv" as row
MERGE (c:Contributor {name: row.contributor})
""")

run("""
LOAD CSV WITH HEADERS FROM "file:///contribution_relationships.csv" as row
MATCH
  (c:Contributor {name: row.contributor}),
  (s:Senator {id: ToInteger(row.icpsr)})
MERGE (c)-[d:DonatedTo {amount: ToInteger(row.amount), race:row.race}]->(s)
""")

In [124]:
# (TEST) Get the organization entities democrats tweet about the most
run("""
MATCH (c:Contributor)-[d:DonatedTo]->(s:Senator)
RETURN d.amount, c.name, s.name, s.party, s.state
ORDER BY d.amount DESC
""")

<Record d.amount=1084606 c.name='alphabet inc' s.name='Jon Ossoff' s.party='democrat' s.state='GA'>
<Record d.amount=721821 c.name='university of california' s.name='Jon Ossoff' s.party='democrat' s.state='GA'>
<Record d.amount=604219 c.name="emily's list" s.name='Jacky Rosen' s.party='democrat' s.state='NV'>
<Record d.amount=543528 c.name='university of michigan' s.name='Gary Peters' s.party='democrat' s.state='MI'>
<Record d.amount=512845 c.name='league of conservation voters' s.name='Jon Tester' s.party='democrat' s.state='MT'>
<Record d.amount=438722 c.name="emily's list" s.name='Maggie Hassan' s.party='democrat' s.state='NH'>
<Record d.amount=406471 c.name='democracy engine' s.name='Maggie Hassan' s.party='democrat' s.state='NH'>
<Record d.amount=405588 c.name="emily's list" s.name='Kyrsten Sinema' s.party='democrat' s.state='AZ'>
<Record d.amount=369789 c.name='microsoft corp' s.name='Jon Ossoff' s.party='democrat' s.state='GA'>
<Record d.amount=361540 c.name='apple inc' s.name='

### Possible queries to write:

(1) Get the most liked tweet by Bernie Sanders that mentions an event

(2) Get all tweets by Bernie Sanders that mention an organization also mentioned in a rollcall description for which he voted yay on

(3) Get the contributers who supported Senators that voted yes on bills mentioning "the councle of environmental quality"