In [22]:
import pandas as pd
import neo4j

In [23]:
# Created neo4j desktop dbms with password "senate"
driver = neo4j.GraphDatabase.driver(uri="bolt://localhost:7687", auth=("neo4j", "senate"))

# Got import dir from neo4j desktop browser
neo4j_import_dir = "/Users/aidankeogh/Library/Application Support/Neo4j Desktop/Application/relate-data/dbmss/dbms-dffef9db-3d5a-4ba6-aebc-aadc8b8f5988/import"

In [24]:

def run(query):
    data = []
    with driver.session() as session:
        result = session.run(query)
        for record in result:
            data.append(record.data())
    return pd.DataFrame(data)

In [25]:
run("""
MATCH (n)
DETACH DELETE n
""")

In [26]:
senators = pd.read_csv("all_senate_data.csv")

code_to_party = {100: "democrat", 200: "republican"}
def create_senator(senator):
    node_attrs = {
        "name": senator["Name"],
        "party": code_to_party.get(senator["party_code"], "other"),
        "state": senator["state_abbrev"],
        "icpsr": senator["icpsr"],
        "twitter": senator["Twitter_username"]
    }
    return pd.Series(node_attrs)
senator_nodes = senators.apply(create_senator, axis=1)
senator_nodes.to_csv(f"{neo4j_import_dir}/senator_nodes.csv")

In [27]:
# Load senators
run("""
LOAD CSV WITH HEADERS FROM "file:///senator_nodes.csv" as row
MERGE (s:Senator {name:row.name, party:row.party, state: row.state, twitter_username:row.twitter, id: toInteger(row.icpsr)})
""")

In [28]:
# Find senators who aren't republicans or democrats
run("""
MATCH (s: Senator {party: "other"})
RETURN s.name, s.party, s.id
""")

Unnamed: 0,s.name,s.party,s.id
0,Angus King,other,41300
1,Bernie Sanders,other,29147


In [29]:

rollcalls = pd.read_csv("S117_rollcalls.csv")

def create_rollcall(rollcall):
    node_attrs = {
        "rollnumber": rollcall["rollnumber"],
        "date": rollcall["date"],
        "yea_count": rollcall["yea_count"],
        "nea_count": rollcall["nay_count"],
        "margin": abs(rollcall["yea_count"] - rollcall["nay_count"]),
        "result": rollcall["vote_result"],
        "desc": rollcall["vote_desc"],
        "bill_number": rollcall["bill_number"],
    }
    return pd.Series(node_attrs)

rollcall_nodes = rollcalls.apply(create_rollcall, axis=1)
rollcall_nodes["bill_number"] = rollcall_nodes["bill_number"].fillna("N/A")
rollcall_nodes.to_csv(f"{neo4j_import_dir}/rollcall_nodes.csv")

In [30]:

# Load all rollcalls
run("""
LOAD CSV WITH HEADERS FROM "file:///rollcall_nodes.csv" as row
WITH row WHERE row.desc is not null
MERGE (r:Rollcall {rollnumber: toInteger(row.rollnumber), date: row.date, yea_count: toInteger(row.yea_count), 
    nea_count: toInteger(row.nea_count), margin: toInteger(row.margin), result: row.result, desc: row.desc, bill_number: row.bill_number})
""")

In [31]:
# Get all rollcalls with over 90 yeas
run("""
MATCH (r: Rollcall)
WHERE r.yea_count > 90
RETURN r.rollnumber, r.desc, r.yea_count
ORDER by r.yea_count DESC
""")

Unnamed: 0,r.rollnumber,r.desc,r.yea_count
0,16,To establish a deficit-neutral reserve fund re...,100
1,21,To establish a deficit-neutral reserve fund re...,100
2,27,To establish a deficit-neutral reserve fund re...,100
3,35,To establish a deficit-neutral fund relating t...,100
4,272,A bill to deposit certain funds into the Crime...,100
5,659,A bill to suspend normal trade relations treat...,100
6,660,A bill to prohibit the importation of energy p...,100
7,19,To establish a deficit-neutral reserve fund re...,99
8,26,To establish a deficit-neutral reserve fund re...,99
9,316,To prohibit enactment of the Green New Deal.,99


In [32]:
votes = pd.read_csv("S117_votes.csv")
vote_types = {
    1: "yea",
    6: "nay",
}
def create_vote_rel(vote):
    icpsr = vote["icpsr"]
    rollnumber = vote["rollnumber"]
    vote_type = vote_types.get(vote["cast_code"], "other")

    return pd.Series({
        "senator_id": int(icpsr),
        "rollnumber": int(rollnumber),
        "vote_type": vote_type
    })
vote_rels = votes.apply(create_vote_rel, axis=1)
vote_rels.to_csv(f"{neo4j_import_dir}/vote_relationships.csv")


In [33]:
# Load relationships between votes
run("""
LOAD CSV WITH HEADERS FROM "file:///vote_relationships.csv" as row
MATCH
  (s:Senator {id: toInteger(row.senator_id)}),
  (r:Rollcall {rollnumber: toInteger(row.rollnumber)})
MERGE (s)-[v:Voted {vote_type: row.vote_type}]->(r)
""")

In [34]:
# (TEST) Get all bills bernie sanders voted on
run("""
MATCH (s:Senator {name: "Bernie Sanders"})-[v:Voted {vote_type: "yea"}]-(r:Rollcall)
RETURN v.vote_type, r.desc, r.bill_number
""")

Unnamed: 0,v.vote_type,r.desc,r.bill_number
0,yea,"Michael D. Smith, of Virginia, to be Chief Exe...",PN715
1,yea,"Isobel Coleman, of New York, to be a Deputy Ad...",PN635
2,yea,A bill to authorize dedicated domestic terrori...,HR350
3,yea,"Andrew M. Luger, of Minnesota, to be United St...",PN1381
4,yea,"John Z. Lee, of Illinois, to be United States ...",PN1965
...,...,...,...
548,yea,A resolution to provide for related procedures...,SRES47
549,yea,A bill to amend the Voting Rights Act of 1965 ...,S4
550,yea,To modify the bill.,HR5376
551,yea,To establish a reserve fund relating to protec...,SCONRES14


In [35]:
tweets = pd.read_csv("direct_tweet_attribs.csv")
tweets["id"] = list(range(0, len(tweets)))
tweets.to_csv(f"{neo4j_import_dir}/tweets.csv")

In [36]:
# Load tweets as nodes
run("""
LOAD CSV WITH HEADERS FROM "file:///tweets.csv" as row
MERGE (t:Tweet {id: toInteger(row.id), text: row.Tweet, retweets: toInteger(row.Retweets), replies: toInteger(row.Replies),
    likes: toInteger(row.Likes), quotes: toInteger(row.Quotes)})
""")

In [37]:
# Load relationships between twitter account and tweet
run("""
LOAD CSV WITH HEADERS FROM "file:///tweets.csv" as row
MATCH
  (t:Tweet {id: toInteger(row.id)}),
  (s:Senator {twitter_username: row.TwitterID})
MERGE (s)-[tw:Tweeted]->(t)
""")

In [38]:
# (TEST) Get all of Bernie Sanders' tweets, ordered by likes
run("""
MATCH (s:Senator {name: "Bernie Sanders"})-[tw:Tweeted]->(t:Tweet)
RETURN t.text, t.likes
ORDER BY t.likes DESC
""")

Unnamed: 0,t.text,t.likes
0,Let's be clear. Eli Lilly should apologize for...,481505
1,"The people of Brazil have voted for democracy,...",180877
2,Progressive candidates had a great election ni...,39733
3,What's going on in this country right now is c...,25880
4,I do not think the American people should be p...,22300
...,...,...
95,RT @MoveOn: Our tour with @BernieSanders and @...,0
96,RT @BernieSanders: OUR FUTURE IS NOW: Our job ...,0
97,RT @Culinary226: 2️⃣2️⃣6️⃣👏🏾2️⃣2️⃣6️⃣👏🏿2️⃣2️⃣6...,0
98,RT @BernieSanders: LIVE FROM TEXAS: @GregCasar...,0


In [39]:
tweet_ners = pd.read_csv("direct_tweet_ners.csv")
tweet_ners["Text"] = tweet_ners["Text"].str.lower()

roll_ners = pd.read_csv("rollcall_ners.csv")
roll_ners["Text"] = roll_ners["Text"].str.lower()

tweet_ners.to_csv(f"{neo4j_import_dir}/tweet_ners.csv")
roll_ners.to_csv(f"{neo4j_import_dir}/roll_ners.csv")

In [40]:
# Load entities from tweets as nodes
run("""
LOAD CSV WITH HEADERS FROM "file:///tweet_ners.csv" as row
MERGE (e:Entity {type: row.NER_Lable, name: row.Text})
""")

# Load in entities of roll calls as additional nodes
run("""
LOAD CSV WITH HEADERS FROM "file:///roll_ners.csv" as row
MERGE (e:Entity {type: row.NER_Lable, name: row.Text})
""")

In [41]:
# Load relationships between tweets and named entities
run("""
LOAD CSV WITH HEADERS FROM "file:///tweet_ners.csv" as row
MATCH
  (t:Tweet {text: row.Tweet}),
  (e:Entity {name: row.Text})
MERGE (t)-[m:Mentions]->(e)
""")

In [42]:
# Load relationships between roll call descriptions and named entities
run("""
LOAD CSV WITH HEADERS FROM "file:///roll_ners.csv" as row
MATCH
  (t:Rollcall {desc: row.vote_desc}),
  (e:Entity {name: row.Text})
MERGE (t)-[m:Mentions]->(e)
""")

In [43]:
# (TEST) Get the organization entities rollcalls that Bernie Sanders voted for that are mentioned most in yea votes
run("""
MATCH (s:Senator {name: "Bernie Sanders"})-[v:Voted {vote_type: "yea"}]->(r:Rollcall)-[m:Mentions]->(e:Entity {type: "ORG"})
RETURN e.name, count(e)
ORDER BY count(e) DESC
""")

Unnamed: 0,e.name,count(e)
0,state,18
1,treasury,13
2,the environmental protection agency,12
3,defense,12
4,washington,11
...,...,...
133,the armed forces,1
134,idaho,1
135,international religious freedom,1
136,the maritime administration,1


In [44]:
# (TEST) Get the organization entities bernie sanders tweets about the most
run("""
MATCH (s:Senator {name: "Bernie Sanders"})-[tw:Tweeted]->(t:Tweet)-[m:Mentions]->(e:Entity {type: "ORG"})
RETURN e.name, count(e)
ORDER BY count(e) DESC
""")

Unnamed: 0,e.name,count(e)
0,congress,13
1,texas,11
2,senate,3
3,nevada,3
4,washington,3
5,the u.s. house,2
6,medicare for all,2
7,starbucks,2
8,house,2
9,social security,2


In [45]:
# (TEST) Get the organization entities democrats tweet about the most
run("""
MATCH (s:Senator {party: "republican"})-[tw:Tweeted]->(t:Tweet)-[m:Mentions]->(e:Entity {type: "ORG"})
RETURN e.name, count(e)
ORDER BY count(e) DESC
""")

Unnamed: 0,e.name,count(e)
0,biden,421
1,senate,176
2,congress,99
3,washington,72
4,gop,47
...,...,...
1229,air force academy,1
1230,28th bomb wing,1
1231,the black hills and badlands tourism associati...,1
1232,leo’s,1


In [46]:
run("""
MATCH (t:Tweet)-[m:Mentions]->(e:Entity)
RETURN e.name, count(e)
ORDER BY count(e) DESC
""")

Unnamed: 0,e.name,count(e)
0,biden,1602
1,today,1036
2,american,486
3,americans,474
4,senate,443
...,...,...
9807,lawless harley davidson,1
9808,delaware county,1
9809,circleville,1
9810,@karilake,1


In [47]:
contributions = pd.read_csv("contributions_with_icpsr.csv")
contributions = contributions.dropna()
contributions["contributor"] = contributions["contributor"].str.lower()
contributions["contributor"] = contributions["contributor"].str.strip()
contributions.to_csv(f"{neo4j_import_dir}/contribution_relationships.csv")

In [48]:
# Load entities for donations/contributors
run("""
LOAD CSV WITH HEADERS FROM "file:///contribution_relationships.csv" as row
MERGE (c:Contributor {name: row.contributor})
""")

run("""
LOAD CSV WITH HEADERS FROM "file:///contribution_relationships.csv" as row
MATCH
  (c:Contributor {name: row.contributor}),
  (s:Senator {id: ToInteger(row.icpsr)})
MERGE (c)-[d:DonatedTo {total: ToInteger(row.total), pac: ToInteger(row.pac), individual: ToInteger(row.individual), race:row.race}]->(s)
""")

In [49]:
run("""
MATCH (c:Contributor)-[d:DonatedTo]->(s:Senator)-[tw:Tweeted]->(t:Tweet)
RETURN s.name, sum(d.total), s.party, sum(t.likes)
ORDER BY sum(t.likes) DESC
""")

Unnamed: 0,s.name,sum(d.total),s.party,sum(t.likes)
0,Mitt Romney,67813200,republican,22513950
1,Bernie Sanders,19227700,other,20708720
2,Marsha Blackburn,107143100,republican,10153120
3,Marco Rubio,71373542,republican,7192900
4,Elizabeth Warren,85030800,democrat,6564280
...,...,...,...,...
71,Jeanne Shaheen,84383046,democrat,40280
72,John Boozman,66459381,republican,38100
73,Mike Crapo,62119332,republican,36220
74,Bill Cassidy,58420250,republican,25380


### Possible queries to write:

(1) Get the most liked tweet by Bernie Sanders that mentions an event

(2) Get all tweets by Bernie Sanders that mention an organization also mentioned in a rollcall description for which he voted yay on

(3) Get the contributers who supported Senators that voted yes on bills mentioning "the councle of environmental quality"