In [2]:
from pyspark.sql import SparkSession
from graphframes import GraphFrame

In [3]:
spark = SparkSession.builder.getOrCreate()

In [4]:
vertexesPath = "Spark/Exercises/Ex_54/vertexes.csv"
edgesPath = "Spark/Exercises/Ex_54/edges.csv"
outputPath = "Spark/Exercises/Ex_54/res"

In [5]:
vertexesDF = spark.read.load(path=vertexesPath , format="csv", header=True, inferSchema=True)
vertexesDF.show()

+---+-----+---+
| id| name|age|
+---+-----+---+
| u1|Alice| 34|
| u2|  Bob| 36|
| u3| John| 30|
| u4|David| 29|
| u5| Paul| 32|
| u6| Adel| 36|
| u7| Eddy| 60|
+---+-----+---+



In [6]:
edgesDF = spark.read.load(path=edgesPath , format="csv", header=True, inferSchema=True)
edgesDF.show()

+---+---+--------+
|src|dst|linktype|
+---+---+--------+
| u1| u2|  friend|
| u1| u5|  friend|
| u2| u3|  follow|
| u3| u2|  follow|
| u4| u1|  friend|
| u4| u5|  friend|
| u5| u1|  friend|
| u5| u4|  friend|
| u5| u6|  follow|
| u6| u3|  follow|
| u7| u6|  follow|
+---+---+--------+



In [7]:
g = GraphFrame(vertexesDF, edgesDF)

In [17]:
motifs = g.find("(v1)-[e1]->(v2) ; !(v2)-[]->(v1)")
motifs.show()

+---------------+----------------+---------------+
|             v1|              e1|             v2|
+---------------+----------------+---------------+
| [u7, Eddy, 60]|[u7, u6, follow]| [u6, Adel, 36]|
|[u1, Alice, 34]|[u1, u2, friend]|  [u2, Bob, 36]|
| [u5, Paul, 32]|[u5, u6, follow]| [u6, Adel, 36]|
|[u4, David, 29]|[u4, u1, friend]|[u1, Alice, 34]|
| [u6, Adel, 36]|[u6, u3, follow]| [u3, John, 30]|
+---------------+----------------+---------------+



In [18]:
motifsFriendNoFriend = motifs.filter("e1.linktype == 'friend'")
motifsFriendNoFriend.show()

+---------------+----------------+---------------+
|             v1|              e1|             v2|
+---------------+----------------+---------------+
|[u1, Alice, 34]|[u1, u2, friend]|  [u2, Bob, 36]|
|[u4, David, 29]|[u4, u1, friend]|[u1, Alice, 34]|
+---------------+----------------+---------------+



In [21]:
finalDF = motifsFriendNoFriend.selectExpr("v1.id as IdFriend","v2.id as IdNotFriend")
finalDF.show()

+--------+-----------+
|IdFriend|IdNotFriend|
+--------+-----------+
|      u1|         u2|
|      u4|         u1|
+--------+-----------+



In [23]:
finalDF.write.csv(outputPath, header=True)