## Understanding_Py2neo
---
J.Adrian Sánchez
September 2016

In [4]:
!pip3 freeze | grep py2neo

py2neo==3.1.2


In [5]:
import time 
import calendar
from datetime import datetime
import requests 
import json
from py2neo import Graph, Node, Relationship

In [6]:
graph = Graph(password="test1234")

In [7]:
print(graph)

<Graph uri='http://localhost:7474/db/data/'>


In [8]:
graph.run("CREATE CONSTRAINT ON (u:User) ASSERT u.username IS UNIQUE")
graph.run("CREATE CONSTRAINT ON (t:Tweet) ASSERT t.id IS UNIQUE")
graph.run("CREATE CONSTRAINT ON (h:Hashtag) ASSERT h.name IS UNIQUE")
graph.run("CREATE CONSTRAINT ON (d:Timedate) ASSERT d.date IS UNIQUE")
graph.run("CREATE CONSTRAINT ON (c:City) ASSERT c.city IS UNIQUE")

<py2neo.database.Cursor at 0x112fbd198>

In [1]:
!curl -XPOST -u **************** 'https://api.twitter.com/oauth2/token?grant_type=client_credentials'

Enter host password for user 'Understanding_Py2neo.ipynb':

In [2]:
twitter_bearer = "***************"


headers = dict(accept="application/json", Authorization="Bearer " + twitter_bearer)
payload = dict(count=75, result_type="mixed", lang="en", q="trump")
base_url = "https://api.twitter.com/1.1/search/tweets.json?"

payload_look = dict(id=id, include_my_retweet="true")
base_url_look= "https://api.twitter.com/1.1/statuses/show.json?"

In [11]:
def find_tweets(since_id):

    payload["since_id"] = since_id
    url = base_url + "q={q}&count={count}&result_type={result_type}&lang={lang}&since_id={since_id}".format(**payload)

    r = requests.get(url, headers=headers)
    tweets= r.json()["statuses"]

    return tweets

In [12]:
def date(created_at):
    dt = datetime.strptime(created_at,'%a %b %d %H:%M:%S +0000 %Y')
    day = dt.day
    month = dt.month
    year = dt.year
    
    return str(day) + "/" + str(month) + "/" + str(year)

In [13]:
def look_tweet(id):
    payload_look["id"] = id
    url = base_url_look + "id={id}&include_my_retweet={include_my_retweet}".format(**payload_look)
    
    r = requests.get(url, headers=headers)
    tweet= r.json()
    
    return tweet

In [14]:
def upload_tweets(tweets):
    print("Uploading tweets")
    for t in tweets:
        u = t["user"]
        e = t["entities"]


        #1.TWEET NODE
        tweet = Node("Tweet", id=t["id"])
        graph.merge(tweet)
        
            #TWEET Properties
        tweet["text"] = t["text"]
        tweet["created_at"] = date(t["created_at"])
        tweet["favorite_count"] = t["favorite_count"]
        tweet["retweet_count"] = t["retweet_count"]
        
        graph.push(tweet)

        
        #2.USER NODE
        user = Node("User", username=u["screen_name"])
        graph.merge(user)
        
            #USER Properties
        user["name"]=u["name"]
        user["tweets"]=u["statuses_count"]
        user["location"]=u["location"]
        user["followers"]=u["followers_count"]
        user["following"]=u["friends_count"]
        
        graph.push(user)


        #I. First Relationship user-posts-tweet
        graph.merge(Relationship(user, "POSTS", tweet))
        
        
        
        #3. DATE NODE
        timedate = Node("Timedate", username=date(t["created_at"]))
        graph.merge(timedate)
        
        
        #II. Second Relationship tweet-born-date
        graph.merge(Relationship(tweet, "BORN", timedate))
        
        #4. HASHTAG NODE
        for h in e.get("hashtags", []):
            hashtag = Node("Hashtag", name = h["text"].lower())
            graph.merge(hashtag)
            
            #III. Third Relationship hashtag-tags-tweet
            graph.merge(Relationship(hashtag, "TAGS", tweet))
        
        #5. USER MENTION NODE
        for m in e.get('user_mentions', []):
            mention = Node("User", username = m["screen_name"])
            graph.merge(mention)
            
            #IV. Fourth Relationship tweet-mentions-mention
            graph.merge(Relationship(tweet, "MENTIONS", mention))
        
        #6. PLACE NODE
        place = t.get("place")
        if place:
            city = Node("City", city = t.get("place").get("name"))
            graph.merge(city)
            
            #V. Fifth Relationship tweet-origin-city
            graph.merge(Relationship(tweet, "ORIGIN", city))
        
        
        #7. REPLY NODE
        reply = t.get("in_reply_to_status_id")
        
        if reply:
            reply_tweet = Node ("Tweet", id = reply)
            graph.merge(reply_tweet)
            
            resp = look_tweet(reply)
            
            #Creating properties of the reply node
            reply_tweet["text"] = resp.get('text')
            reply_tweet["created_at"] = resp.get("created_at")
            reply_tweet["favorite_count"] = resp.get("favorite_count")
            reply_tweet["retweet_count"] = resp.get("retweet_count")
            
            #VI. Sixth Relationship tweet-reply_to-reply_tweet
            graph.merge(Relationship(tweet, "REPLY_TO", reply_tweet))
        
          
        #8. RETWEET NODE
        ret = t.get("retweeted_status", {})
        
        if ret:
            retweet = Node("Tweet", id = ret.get("id"))
            graph.merge(retweet)
            
            #Creating properties of the retweet
            retweet["text"] = ret.get("text")
            retweet["created_at"] = ret.get("created_at")
            retweet["favorite_count"] = ret.get("favorite_count")
            retweet["retweet_count"] = ret.get("retweet_count")
            
            graph.push(retweet)
            
            #VI. Seven Relationship tweet-retweets_retweet
            graph.merge(Relationship(tweet, "RETWEETS", retweet))

      
    
    print("Nodes-relationships created in neo4j")


    

In [18]:
since_id = -1

while True:

    try:
        tweets = find_tweets(since_id=since_id)

        # Save tweets in a json file
        outFile = open('tweets_test.json', 'a')
        outFile.write(json.dumps(tweets))
        outFile.close()

        if not tweets:
            print("No tweets found. Looking for more of them in 60 seconds")
            time.sleep(60)
            continue

        since_id = tweets[0].get("id")
        print("Tweets found and the upload to NEo4j will begin")
        upload_tweets(tweets)

        print("{} tweets uploaded!".format(len(tweets)))
        time.sleep(60)

    except Exception as e:
        print(e)
        time.sleep(60)
        continue

Tweets found and the upload to NEo4j will begin
Uploading tweets
Nodes-relationships created in neo4j
75 tweets uploaded!


KeyboardInterrupt: 