### Counting Triangles

In [None]:
# !apt-get install openjdk-8-jdk-headless -qq > /dev/null
# !wget -q https://archive.apache.org/dist/spark/spark-3.2.0/spark-3.2.0-bin-hadoop3.2.tgz
# !tar xf /content/spark-3.2.0-bin-hadoop3.2.tgz
# !pip install -q findspark

In [None]:
# import os
# os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"
# os.environ["SPARK_HOME"] = "/content/spark-3.2.0-bin-hadoop3.2"

# import findspark
# findspark.init()

In [None]:
from pyspark import SparkContext
from itertools import combinations
from operator import add, sub

In [None]:
sc = SparkContext.getOrCreate()
rdd0 = sc.textFile("triangle.txt")
print(rdd0.collect())s

['0, 1', '0, 2', '1, 2', '1, 3', '2, 3']


In [None]:
def parse(element):
    return tuple(int(x) for x in element.split(","))

rdd1 = rdd0.map(lambda element : parse(element))
values = rdd1.collect()
print(values)

[(0, 1), (0, 2), (1, 2), (1, 3), (2, 3)]


In [None]:
def neighbours(element):
    returnlist = []
    returnlist.append((element[0], element[1]))
    returnlist.append((element[1], element[0]))
    return returnlist

rdd2 = rdd1.flatMap(lambda element : neighbours(element))
print(rdd2.collect())

rdd3 = rdd2.groupByKey().mapValues(list)
rdd3.collect()

[(0, 1), (1, 0), (0, 2), (2, 0), (1, 2), (2, 1), (1, 3), (3, 1), (2, 3), (3, 2)]


[(0, [1, 2]), (2, [0, 1, 3]), (1, [0, 2, 3]), (3, [1, 2])]

In [None]:
def toCheck(element):
    key = element[0]
    values = element[1]
    if len(values) >= 2:
        return list(combinations(values, 2))
    else:
        return [tuple(values)]

rdd4 = rdd3.flatMap(lambda element : toCheck(element))
print(rdd4.collect())

[(1, 2), (0, 1), (0, 3), (1, 3), (0, 2), (0, 3), (2, 3), (1, 2)]


In [None]:
def mapping(element):
    return (element, "toCheck")

rdd5 = rdd4.map(lambda element : mapping(element))
print(rdd5.collect())

[((1, 2), 'toCheck'), ((0, 1), 'toCheck'), ((0, 3), 'toCheck'), ((1, 3), 'toCheck'), ((0, 2), 'toCheck'), ((0, 3), 'toCheck'), ((2, 3), 'toCheck'), ((1, 2), 'toCheck')]


In [None]:
def present(element):
    return element, "presentEdge"

rdd6 = rdd2.map(lambda element : present(element))
print(rdd6.collect())

rdd7 = rdd5.join(rdd6)
print(rdd7.collect())

[((0, 1), 'presentEdge'), ((1, 0), 'presentEdge'), ((0, 2), 'presentEdge'), ((2, 0), 'presentEdge'), ((1, 2), 'presentEdge'), ((2, 1), 'presentEdge'), ((1, 3), 'presentEdge'), ((3, 1), 'presentEdge'), ((2, 3), 'presentEdge'), ((3, 2), 'presentEdge')]
[((0, 2), ('toCheck', 'presentEdge')), ((1, 2), ('toCheck', 'presentEdge')), ((1, 2), ('toCheck', 'presentEdge')), ((0, 1), ('toCheck', 'presentEdge')), ((2, 3), ('toCheck', 'presentEdge')), ((1, 3), ('toCheck', 'presentEdge'))]


In [None]:
def findTriangles(element1, element2):
    return element1 + element2

rdd8 = rdd7.reduceByKey(lambda element1, element2 : findTriangles(element1, element2))
print(rdd8.collect())

[((0, 2), ('toCheck', 'presentEdge')), ((1, 2), ('toCheck', 'presentEdge', 'toCheck', 'presentEdge')), ((0, 1), ('toCheck', 'presentEdge')), ((2, 3), ('toCheck', 'presentEdge')), ((1, 3), ('toCheck', 'presentEdge'))]


In [None]:
def countTriangles(element):
    key = element[0]
    value = element[1]
    counts = int(value.count('toCheck'))
    result = (counts)
    return result

rdd9 = rdd8.map(lambda element : countTriangles(element))

rdd10 = rdd9.filter(lambda element : element == 2).reduce(add)

print("Number of Triangles: ",rdd10)

Number of Triangles:  2
