In [1]:
!pip install pyspark



In [4]:
# Import necessary libraries
from pyspark.sql import SparkSession
from pyspark import SparkContext
from itertools import combinations
import pandas as pd
import random

# Create the CSV file with random edges
edges = []
for _ in range(5000):
    u = random.randint(1, 100)
    v = random.randint(1, 100)
    if u < v:
        edges.append((u, v))

# Create a DataFrame and save to CSV
edges_df = pd.DataFrame(edges, columns=['u', 'v'])
edges_df.to_csv('random_edges.csv', index=False, header=False)

# Initialize SparkSession
spark = SparkSession.builder.appName("Triangle Finder").getOrCreate()

# Load the CSV file into an RDD
edges = spark.sparkContext.textFile("random_edges.csv").map(lambda line: tuple(map(int, line.split(','))))

# Convert edges to a set for quick lookup
edge_set = set(edges.collect())  # Collect edges in a set

# Group by node to get all connected nodes
grouped_edges = edges.groupByKey().mapValues(list)

# Reduce phase: Find triangular relationships using edge_set
triangles = grouped_edges.flatMap(lambda node_neighbors: [
    (node_neighbors[0], (a, b)) for a, b in combinations(node_neighbors[1], 2)
    if (a, b) in edge_set or (b, a) in edge_set
])

# Collect results
result = triangles.collect()

# Print results
for triangle in result:
    print(f"Triangle found: {triangle}")

# Stop the Spark session
spark.stop()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Triangle found: (25, (46, 88))
Triangle found: (25, (46, 60))
Triangle found: (25, (46, 89))
Triangle found: (25, (46, 39))
Triangle found: (25, (46, 64))
Triangle found: (25, (88, 34))
Triangle found: (25, (88, 98))
Triangle found: (25, (88, 42))
Triangle found: (25, (88, 46))
Triangle found: (25, (88, 89))
Triangle found: (25, (88, 46))
Triangle found: (25, (88, 34))
Triangle found: (25, (88, 98))
Triangle found: (25, (88, 87))
Triangle found: (25, (88, 28))
Triangle found: (25, (34, 29))
Triangle found: (25, (34, 88))
Triangle found: (25, (34, 60))
Triangle found: (25, (34, 74))
Triangle found: (25, (34, 39))
Triangle found: (25, (34, 40))
Triangle found: (25, (34, 50))
Triangle found: (25, (34, 50))
Triangle found: (25, (34, 83))
Triangle found: (25, (34, 28))
Triangle found: (25, (98, 63))
Triangle found: (25, (98, 42))
Triangle found: (25, (98, 88))
Triangle found: (25, (98, 60))
Triangle found: (25, (98, 74))
Trian