# Neo4J Import and Queries

In [3]:
pip install neo4j==4.4.11

Collecting neo4j==4.4.11
  Downloading neo4j-4.4.11.tar.gz (87 kB)
[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m87.5/87.5 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: neo4j
  Building wheel for neo4j (setup.py) ... [?25ldone
[?25h  Created wheel for neo4j: filename=neo4j-4.4.11-py3-none-any.whl size=111249 sha256=e1d95c89bf456f3604559b494c9ad275840f0f1fcc78ab80313d41c01188c4e6
  Stored in directory: /home/jovyan/.cache/pip/wheels/9b/29/51/cd9c7f08102e1afefd87d7307e45556c08f51eec519f2e6e05
Successfully built neo4j
Installing collected packages: neo4j
Successfully installed neo4j-4.4.11
Note: you may need to restart the kernel to use updated packages.


In [4]:
import pandas as pd
import random
import csv

## Aufbau der Datenbankverbindung

In [5]:
from neo4j import GraphDatabase
class Neo4jConnection:
    
    def __init__(self, uri, user, pwd):
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
        except Exception as e:
            print("Failed to create the driver:", e)
        
    def close(self):
        if self.__driver is not None:
            self.__driver.close()
        
    def query(self, query, db=None):
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        try: 
            session = self.__driver.session(database=db) if db is not None else self.__driver.session() 
            response = list(session.run(query))
        except Exception as e:
            print("Query failed:", e)
        finally: 
            if session is not None:
                session.close()
        return response

In [12]:
conn = Neo4jConnection(uri="bolt://core1:7687", user="neo4j", pwd="password")
secondConn = Neo4jConnection(uri="bolt://core2:7688", user="neo4j", pwd="password")

In [13]:
print(conn.query(f"MATCH (n) RETURN n LIMIT 1;"))

print(secondConn.query(f"MATCH (n) RETURN n LIMIT 1;"))

[<Record n=<Node id=0 labels=frozenset({'Tweet'}) properties={'date_time': '12/01/2017 19:52', 'language': 'en', 'content': 'Is history repeating itself...?#DONTNORMALIZEHATE https://t.co/ngG11quhmK', 'tweetId': '1'}>>]
Query failed: Couldn't connect to core1:7688 (resolved to ('178.201.0.2:7688',)):
Failed to establish connection to ResolvedIPv4Address(('178.201.0.2', 7688)) (reason [Errno 111] Connection refused)
None


## Importieren der Knoten und Beziehungen

### Einlesen des "twitter_combined" Files

In [6]:
follows_file = open('twitter_combined.txt', 'r')
lines = follows_file.readlines()

follows = []
users = set()
for line in lines:
    line = line.replace('\n','')
    id1, id2 = line.split(' ')
    users.add(id1)
    users.add(id2)
    follows.append([id1, id2])

### Erstellung des CSV Files f√ºr den Import der User

In [7]:
count = 0
f = open("import/users.csv", "w")
f.write("userId,name,:Label\n")
for user in users:
    f.write(user + ",user" + str(count) + ",USER\n")
    count += 1
f.close()

### Importieren der User

In [24]:
%%time
conn.query("LOAD CSV WITH HEADERS FROM 'file:///users.csv' AS row CREATE (:User {userId: row.userId, name: row.name})")
conn.query("CREATE CONSTRAINT user_constraint IF NOT EXISTS FOR (u:User) REQUIRE u.userId IS UNIQUE")

CPU times: user 2.56 ms, sys: 274 ¬µs, total: 2.84 ms
Wall time: 2.48 s


[]

In [4]:
print(conn.query("MATCH(u: User) return u LIMIT 10"));
print(secondConn.query("MATCH(u: User) return u LIMIT 10"))

[<Record u=<Node id=52543 labels=frozenset({'User'}) properties={'name': 'user0', 'userId': '30782151'}>>, <Record u=<Node id=52544 labels=frozenset({'User'}) properties={'name': 'user1', 'userId': '399727099'}>>, <Record u=<Node id=52545 labels=frozenset({'User'}) properties={'name': 'user2', 'userId': '23618093'}>>, <Record u=<Node id=52546 labels=frozenset({'User'}) properties={'name': 'user3', 'userId': '31565013'}>>, <Record u=<Node id=52547 labels=frozenset({'User'}) properties={'name': 'user4', 'userId': '490171722'}>>, <Record u=<Node id=52548 labels=frozenset({'User'}) properties={'name': 'user5', 'userId': '190807988'}>>, <Record u=<Node id=52549 labels=frozenset({'User'}) properties={'name': 'user6', 'userId': '7980402'}>>, <Record u=<Node id=52550 labels=frozenset({'User'}) properties={'name': 'user7', 'userId': '144012142'}>>, <Record u=<Node id=52551 labels=frozenset({'User'}) properties={'name': 'user8', 'userId': '92676878'}>>, <Record u=<Node id=52552 labels=frozenset(

### Erstellung des CSV Files f√ºr den Import der Tweets anhand des "tweets" CSV Files 

In [10]:
tweets = pd.read_csv("tweets.csv")
f = open("import/tweetsDB.csv", "w")
writer = csv.writer(f)
writer.writerow(["tweetId","content","date_time","language",":Label"])
count = 1
for index, tweet in tweets.iterrows():
    writer.writerow([count,tweet["content"],tweet["date_time"],tweet["language"],"TWEET"])
    count += 1
f.close()

### Importieren der Tweets

In [23]:
%%time
conn.query("LOAD CSV WITH HEADERS FROM 'file:///tweetsDB.csv' AS row CREATE (:Tweet {tweetId: row.tweetId, content: row.content, date_time: row.date_time, language: row.language})")
conn.query("CREATE CONSTRAINT tweet_constraint IF NOT EXISTS FOR (t:Tweet) REQUIRE t.tweetId IS UNIQUE")

Query failed: {code: Neo.ClientError.Schema.ConstraintValidationFailed} {message: Node(0) already exists with label `Tweet` and property `tweetId` = '1'}
CPU times: user 652 ¬µs, sys: 2.65 ms, total: 3.3 ms
Wall time: 98.2 ms


[]

### Erstellung des CSV Files f√ºr den Import der FOLLOWS-Beziehung

In [19]:
f = open("import/follows.csv", "w")
f.write("userId,followerId,:TYPE\n")
for follow in follows:
    f.write(follow[0] + "," + follow[1] + ",FOLLOWS\n")
f.close()

### Import der FOLLOWS-Beziehungen

In [29]:
%%time
conn.query("LOAD CSV WITH HEADERS FROM 'file:///follows.csv' AS row MATCH (e:User {userId: row.userId}) MATCH (c:User {userId: row.followerId}) MERGE (e)-[:FOLLOWS]->(c)")

CPU times: user 1.13 ms, sys: 3.51 ms, total: 4.64 ms
Wall time: 57.8 s


[]

### Erstellung des CSV Files f√ºr den Import der POSTED-Beziehung
Jeder der Tweets wird zuf√§llig auf einen der 100 User mit den meisten Followern verteilt.

In [30]:
top100 = conn.query("MATCH (a)-[:FOLLOWS]->(b) RETURN b, COUNT(a) as followers ORDER BY followers DESC LIMIT 100")

In [31]:
user_ids = []
for record in top100:
    user_ids.append(record.get("b").get("userId"))

tweets = pd.read_csv("import/tweetsDB.csv")
f = open("import/posted.csv", "w")
f.write("userId,tweetId,:TYPE\n")
for index, tweet in tweets.iterrows():
    f.write(f'{random.choice(user_ids)},{tweet["tweetId"]},POSTED\n')
f.close()

### Import der POSTED-Beziehungen

In [32]:
%%time
conn.query("LOAD CSV WITH HEADERS FROM 'file:///posted.csv' AS row MATCH (u:User {userId: row.userId}) MATCH (t:Tweet {tweetId: row.tweetId}) MERGE (u)-[:POSTED]->(t)")

CPU times: user 3.85 ms, sys: 0 ns, total: 3.85 ms
Wall time: 2.18 s


[]

### Erstellung des CSV Files f√ºr den Import der LIKES-Beziehung
F√ºr jeden Tweet werden aus den Followern des Posters etwa 10 % ausgew√§hlt, welche des Post liken. Die Like-Zahlen aus dem "tweets" CSV File werden dabei nicht ber√ºcksichtigt.

In [33]:
allTweets = conn.query("Match (t: Tweet) RETURN t");
f = open("import/likes.csv","w")
f.write("userId,tweetId,:TYPE\n")

for user in top100: 
    followers = conn.query(f"MATCH (follower:User)-[:FOLLOWS]->(following:User) WHERE following.userId ='{user['b']['userId']}' RETURN follower")
    tweets = conn.query(f"MATCH (u:User {{userId: '{user['b']['userId']}'}})-[:POSTED]->(t:Tweet) RETURN t");
    for tweet in tweets: 
        for follower in followers:
            shouldLike = random.random() < 0.1
            if shouldLike:
                f.write(f"{follower['follower']['userId']},{tweet['t']['tweetId']},LIKES\n")

f.close()    

### Import der LIKES-Beziehungen

In [34]:
%%time
conn.query("LOAD CSV WITH HEADERS FROM 'file:///likes.csv' as row CALL { with row MATCH (u:User {userId: row.userId}) MATCH (t:Tweet {tweetId: row.tweetId}) MERGE (u)-[:LIKES]->(t) }  IN TRANSACTIONS OF 100000 ROWS")

CPU times: user 7.48 ms, sys: 4.32 ms, total: 11.8 ms
Wall time: 3min 14s


[]

## Queries

### 1. Alle Posts eines zuf√§lligen Users

In [35]:
random_poster = random.choice(top100)

result = conn.query(f"MATCH (u:User {{name: '{random_poster['b']['name']}'}})-[:POSTED]->(t:Tweet) RETURN t")
for tweet in result:
    print(tweet['t']['content'])
    print("")

Tonight: @BillCosby is back! Then playing a game with @TigerWoods &amp; @McIlroyRory and music from @royksopp &amp; @robynkonichiwa! #FallonTonight

Brooklynnnnnnnnn!!!!!

#Selenators this is all you guys MTV's song of the summer. http://t.co/Z3ATp13T92 http://t.co/MylSDVHWiT

Pakistan passes a law to correct a loophole that allowed perpetrators of "honor killings" to escape prosecution‚Ä¶ https://t.co/XG2MTLtwFz

I üíú seeing my babes this excited üôà yes, you'll have the real thing tomorrow pm! yes, there's still more tea for the rest of the countdown.‚òïÔ∏è

Ophelia we match!!!!!!!!!!!! https://t.co/F75c2aACKn

üåàüåàTULSA WE ARE CELEBRATING EQUALITY WITH YOU TONIGHT AT #THEPRISMATICWORLDTOUR üåàüåà

Congrats, @becolibe and @meganyousmile! You may have had terrible #VacationNightmares, but you won my contest, so clearly it was worth it.

The chaos in USA is the result of Trump's irresponsible campaigning. He is not a role model, look at this mess he created. #LoveTrumpsHate

F

### 2. Top 100 User mit den meisten Followern

In [60]:
top100 = conn.query("MATCH (a)-[:FOLLOWS]->(b) RETURN b, COUNT(a) as followers ORDER BY followers DESC LIMIT 100")

for user in top100:
    print(user['b']['name'], "hat", user['followers'], "Follower")

user67743 hat 3383 Follower
user71598 hat 3216 Follower
user72657 hat 2735 Follower
user3062 hat 2647 Follower
user30726 hat 2471 Follower
user60330 hat 2462 Follower
user38541 hat 2133 Follower
user59038 hat 2074 Follower
user30474 hat 1905 Follower
user37396 hat 1707 Follower
user35169 hat 1632 Follower
user19729 hat 1591 Follower
user26715 hat 1521 Follower
user64934 hat 1503 Follower
user14889 hat 1500 Follower
user2852 hat 1410 Follower
user1938 hat 1402 Follower
user11439 hat 1370 Follower
user3994 hat 1337 Follower
user40506 hat 1315 Follower
user56924 hat 1273 Follower
user23506 hat 1255 Follower
user57335 hat 1231 Follower
user5121 hat 1214 Follower
user44896 hat 1201 Follower
user4986 hat 1198 Follower
user4367 hat 1186 Follower
user65733 hat 1167 Follower
user76352 hat 1154 Follower
user68912 hat 1123 Follower
user41967 hat 1120 Follower
user47151 hat 1108 Follower
user28248 hat 1098 Follower
user71330 hat 1088 Follower
user30325 hat 1072 Follower
user18749 hat 1070 Follower

### 3. Top 100 User, die den meisten Usern der Top 100 folgen

In [55]:
query = """MATCH (follower:User)-[follows:FOLLOWS]->(followed:User)
WHERE followed.userId IN [""" 

count = 0
for user in top100:
    query += f"'{user['b']['userId']}'"
    if count < 99: query += ","
    count += 1

query += """]
WITH follower, COUNT(follows) AS numFollowed
ORDER BY numFollowed DESC
RETURN follower.name AS username, numFollowed
LIMIT 100
"""

result = conn.query(query)

In [56]:
for record in result:
    print(record['username'], "folgt", record['numFollowed'], 'Usern')

user30474 folgt 69 Usern
user25721 folgt 55 Usern
user735 folgt 50 Usern
user50432 folgt 49 Usern
user28248 folgt 47 Usern
user16364 folgt 46 Usern
user13760 folgt 44 Usern
user29386 folgt 41 Usern
user9913 folgt 40 Usern
user42307 folgt 39 Usern
user43513 folgt 38 Usern
user58519 folgt 38 Usern
user65949 folgt 37 Usern
user21107 folgt 37 Usern
user52394 folgt 37 Usern
user45163 folgt 36 Usern
user21975 folgt 35 Usern
user59631 folgt 35 Usern
user58811 folgt 35 Usern
user31292 folgt 35 Usern
user1489 folgt 34 Usern
user47150 folgt 34 Usern
user38986 folgt 34 Usern
user40978 folgt 34 Usern
user49162 folgt 34 Usern
user7739 folgt 34 Usern
user19703 folgt 33 Usern
user38975 folgt 33 Usern
user19729 folgt 33 Usern
user45948 folgt 33 Usern
user23925 folgt 31 Usern
user46002 folgt 31 Usern
user57612 folgt 31 Usern
user14998 folgt 31 Usern
user40958 folgt 31 Usern
user26549 folgt 30 Usern
user44896 folgt 30 Usern
user76341 folgt 30 Usern
user33997 folgt 30 Usern
user77697 folgt 30 Usern
user2

### 4. Informationen f√ºr pers√∂nliche Startseite eines zuf√§lligen Users (der Top 100 mit den meisten Followern)

In [39]:
random_user = random.choice(top100)

#### 1. Anzahl der Follower

In [40]:
result = conn.query(f"MATCH (follower:User)-[:FOLLOWS]->(user:User {{name: '{random_user['b']['name']}'}}) RETURN count(follower) AS followerCount")
print(result[0]['followerCount'])

1214


#### 2. Anzahl der verfolgten Accounts

In [41]:
result = conn.query(f"MATCH (user:User {{name: '{random_user['b']['name']}'}})-[:FOLLOWS]->(followed:User) RETURN count(followed) AS followedCount")
print(result[0]['followedCount'])

62


#### 3.1 Die 25 meistgeliketen Posts der verfolgten Accounts

In [42]:
result = conn.query(f"""
MATCH (follower:User)-[:FOLLOWS]->(followed:User)-[:POSTED]->(tweet:Tweet)
WHERE follower.name = '{random_user['b']['name']}'
WITH follower, collect(followed.name) AS followedUsers
MATCH (t:Tweet)<-[:POSTED]-(u:User)
WHERE u.name IN followedUsers
OPTIONAL MATCH (t)<-[:LIKES]-(liker:User)
RETURN t.content AS content, COUNT(liker) AS numLikes
ORDER BY numLikes DESC
LIMIT 25
""")

for tweet in result:
    print(tweet['content'])
    print("Likes: ", tweet['numLikes'])
    print()

Trump offers retired Army Lt. Gen. Michael Flynn the role of national security adviser, transition official says‚Ä¶ https://t.co/SHS0UjuhUt
Likes:  112

You won't want to miss a second of Premiere Week. Season 13 starts today. http://t.co/lLQpzpSN51
Likes:  109

Randy Jackson's leaving ‚ÄúAmerican Idol." Said he wanted to go out when the show was on top, but decided to wait another 7 years. #fallonmono
Likes:  109

@HolyErotica coming next week BUB
Likes:  106

HOW TO WEAR OLYMPIC SIVER METALS https://t.co/hGcyAXkcix
Likes:  104

Congrats Whitney on making homecoming court, you deserve it.
http://t.co/E2g8iLPd
Likes:  104

http://t.co/amRS3wkAIG
Likes:  104

Dublin... You sang beautifully and you danced your asses off. What a special night. Thanks for having‚Ä¶ http://t.co/aqjslHFDwf
Likes:  103

I'm proud being named as the top player of the http://t.co/z55vCy1G 50. Thank you http://t.co/0mq6fNNp
Likes:  103

http://t.co/C1OGbgDdLv
Likes:  101

Add your name to let the Senate know: Th

#### 3.2 Die 25 neuesten Posts der verfolgten Accounts

In [43]:
result = conn.query(f"""
MATCH (follower:User)-[:FOLLOWS]->(followed:User)-[:POSTED]->(tweet:Tweet)
WHERE follower.name = '{random_user['b']['name']}'
WITH follower, collect(followed.name) AS followedUsers
MATCH (t:Tweet)<-[:POSTED]-(u:User)
WHERE u.name IN followedUsers
RETURN t.content AS content, t.date_time AS date
ORDER BY t.date_time DESC
LIMIT 25
""")

for tweet in result:
    print(tweet['content'])
    print("Getweeted am: ", tweet['date'])
    print()

Our family group chat about our 2016 goals is giving me life!!!! #WatchOutWorld
Getweeted am:  31/12/2015 20:44

Filmmaker @FinalCutKing‚Äôs Halloween tricks result in hilariously delightful treats https://t.co/DWFz4Diu7l https://t.co/DDgEMilChw
Getweeted am:  31/10/2015 20:40

Kristen, thank u for sharing ur story w me :) Keep up the good work girl! Hope to see u in Vegas!! #WorkBxxCH XO http://t.co/aEu8bm5ycL
Getweeted am:  31/10/2013 01:58

They just told me Red sold 1.2 million albums first week. How is this real life?! You are UNREAL. I love you so much. Thanks a million ;)
Getweeted am:  31/10/2012 00:29

Have u seen the official video yet for #WhatDoYouMean? http://t.co/kuntEnaDNe
Getweeted am:  31/08/2015 14:08

Blue water, rocky coast ‚Äì California dreamin'. #TravelTuesday
https://t.co/UIYeA6xzOl
Getweeted am:  31/05/2016 21:29

No se pierdan #LaBicicleta - el nuevo sencillo de @CarlosVives y Shak - ya disponible en descarga y stream! ShakHQüö≤
https://t.co/oBtzP6iVng
Getwee

#### 4. Erstellung eines neuen Posts: Herstellen einer Cashing-Beziehung bei allen Followern

In [44]:
num_tweets = conn.query("MATCH (t:Tweet) RETURN COUNT(t)")[0]['COUNT(t)']
print(num_tweets)

52542


In [45]:
tweet = {
    "tweetId": str(num_tweets+1),
    "content": "Doing neo4j queries right now #BDEA",
    "date_time": "18/06/2023 09:48",
    "language": "en"
}

##### Tweet importieren

In [46]:
new_tweet = conn.query(f"CREATE (t:Tweet {{tweetId: '{tweet['tweetId']}', content: '{tweet['content']}', date_time: '{tweet['date_time']}', language: '{tweet['language']}'}}) RETURN t")

In [47]:
print(new_tweet)

[<Record t=<Node id=133849 labels=frozenset({'Tweet'}) properties={'date_time': '18/06/2023 09:48', 'language': 'en', 'tweetId': '52543', 'content': 'Doing neo4j queries right now #BDEA'}>>]


##### POSTED-Beziehung herstellen

In [48]:
conn.query(f"MATCH (user:User {{userId: '{random_user['b']['userId']}'}}) MATCH (tweet:Tweet {{tweetId: '{new_tweet[0]['t']['tweetId']}'}}) CREATE (user)-[:POSTED]->(tweet)")

[]

##### CACHED-Beziehung bei allen Followern des Posters herstellen

In [49]:
followers = conn.query(f"MATCH (follower:User)-[:FOLLOWS]->(user:User {{userId: '{random_user['b']['userId']}'}}) RETURN follower")
print(len(followers))

1214


In [50]:
print(random_user['b']['userId'])

14230524


In [54]:
for follower in followers:
    conn.query(f"MATCH (user:User {{userId: '{follower['follower']['userId']}'}}) MATCH (tweet:Tweet {{tweetId: '{new_tweet[0]['t']['tweetId']}'}}) CREATE (user)-[:CACHED]->(tweet)")

##### Check how many users have the tweet cached (should be the same as number followers)

In [52]:
conn.query("MATCH (u:User)-[r:CACHED]->(tweet:Tweet {tweetId: '52543'}) RETURN COUNT(u)")

[<Record COUNT(u)=1214>]

#### 5. Die 25 beliebtesten Tweets, welche die W√∂rter "hello" und "from" enthalten

In [53]:
result = conn.query("""
MATCH (t:Tweet)
WHERE ALL(word IN [' hello ', ' from '] WHERE t.content CONTAINS word)
WITH t
MATCH (t)<-[:LIKES]-(liker:User)
RETURN t.content AS content, COUNT(liker) AS numLikes
ORDER BY numLikes DESC
LIMIT 25
""")

for tweet in result:
    print(tweet['content'])
    print("Likes: ", tweet['numLikes'])
    print()

You asked: Hi Ronaldo! I love you so much! Please say hello to me! I am from Turkey!  http://t.co/xpDH5Ppb67
Likes:  219

You asked: 
Hello from Japan!
I'm Anna!I I'm a big fan! Please say me hello Anna!
 http://t.co/5Oyi7SiUhY
Likes:  208

You asked: hello from morocco 
 http://t.co/gbuEyP5953
Likes:  144

#JoanneVibes say hello from backstage at...??? https://t.co/m9PI2oUUo8
Likes:  112

Say hello to Himanshu Yadav and all my friends from #India. I will come shortly with more news. http://t.co/60xEW67c
Likes:  78

Why did @Adele cross the road? She wanted to say hello from the other side.
Likes:  75

You asked: Can you say hello to me?  My name is Narin and I am from Germany http://t.co/lx2CRb3xwh
Likes:  75

You asked: hello from France, champion ! :) im so proud of you http://t.co/AhDl5iMGzF
Likes:  69

You asked: Can you say hello to me ,please? I am from Bulgaria!!! http://t.co/dyNj55sAq4
Likes:  57

