In [6]:
# Import necessary libraries
from pyspark.sql import SparkSession
import pandas as pd
import random

# Step 1: Create the CSV file with random edges
edges = []
for _ in range(5000):
    u = random.randint(1, 100)
    v = random.randint(1, 100)
    if u < v:  # Ensure u < v to prevent duplicate edges
        edges.append((u, v))

# Create a DataFrame and save to CSV
edges_df = pd.DataFrame(edges, columns=['u', 'v'])
edges_df.to_csv('random_edges.csv', index=False, header=False)

# Step 2: Initialize Spark Session
spark = SparkSession.builder.appName("ETTP Triangle Finder").getOrCreate()

# Step 3: Load the CSV file into an RDD
edges_rdd = spark.sparkContext.textFile("random_edges.csv").map(lambda line: tuple(map(int, line.split(','))))

# Step 4: Define the partitioning function
def partition_function(edge):
    u, v = edge
    # Partitioning logic based on the sum of vertices (or any other logic)
    partition_id = (u + v) % 4  # Example partitioning logic
    return partition_id, edge

# Step 5: Map phase to assign edges to partitions
partitioned_edges = edges_rdd.map(partition_function)

# Step 6: Group edges by partition
grouped_edges = partitioned_edges.groupByKey().mapValues(list)

# Step 7: Define the triangle counting function for each partition
def find_triangles_in_partition(edges):
    triangles = []  # List to collect triangle sets
    edges_list = list(edges)
    for (u, v) in edges_list:
        # Find neighbors of u and v
        neighbors_u = set(x for x, y in edges_list if x != u and (u, x) in edges_list)
        neighbors_v = set(x for x, y in edges_list if x != v and (v, x) in edges_list)
        common_neighbors = neighbors_u.intersection(neighbors_v)

        # For each common neighbor, create a triangle
        for w in common_neighbors:
            triangles.append((u, v, w))  # Add the triangle (u, v, w)
    return triangles

# Step 8: Find triangles in each partition
triangle_lists = grouped_edges.mapValues(find_triangles_in_partition)

# Step 9: Collect results and flatten the triangle lists
all_triangles = triangle_lists.flatMap(lambda x: x[1]).collect()

# Step 10: Print the triangles found
print("Triangles found:")
for triangle in all_triangles:
    print(triangle)

# Step 11: Stop the Spark session
spark.stop()


Triangles found:
(2, 30, 74)
(2, 30, 86)
(36, 48, 72)
(36, 48, 88)
(36, 48, 80)
(36, 48, 60)
(24, 60, 72)
(44, 76, 80)
(72, 80, 88)
(48, 52, 60)
(56, 64, 72)
(18, 34, 90)
(18, 34, 58)
(18, 34, 38)
(14, 58, 94)
(14, 58, 90)
(14, 58, 70)
(38, 62, 66)
(18, 26, 34)
(18, 26, 90)
(18, 26, 50)
(18, 26, 82)
(44, 48, 72)
(44, 48, 80)
(44, 48, 68)
(14, 86, 90)
(14, 86, 94)
(36, 48, 72)
(36, 48, 88)
(36, 48, 80)
(36, 48, 60)
(4, 24, 32)
(4, 24, 72)
(4, 24, 52)
(4, 24, 60)
(50, 66, 90)
(12, 24, 32)
(12, 24, 88)
(12, 24, 72)
(12, 16, 72)
(12, 16, 88)
(12, 16, 36)
(4, 40, 64)
(4, 40, 44)
(46, 54, 66)
(46, 66, 82)
(46, 66, 90)
(48, 68, 72)
(12, 24, 32)
(12, 24, 88)
(12, 24, 72)
(4, 36, 64)
(4, 36, 40)
(4, 36, 72)
(4, 36, 60)
(38, 82, 90)
(38, 82, 86)
(20, 76, 80)
(26, 50, 66)
(26, 50, 90)
(24, 60, 72)
(18, 34, 90)
(18, 34, 58)
(18, 34, 38)
(34, 38, 90)
(22, 58, 82)
(22, 58, 90)
(20, 48, 72)
(20, 48, 80)
(20, 48, 60)
(24, 32, 72)
(40, 76, 80)
(24, 72, 88)
(12, 72, 88)
(32, 72, 92)
(4, 32, 64)
(4, 32, 