### Q2
Setup

In [None]:
!pip install pyspark
!pip install -U -q PyDrive
!apt install openjdk-8-jdk-headless -qq
import os
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"

Collecting pyspark
  Downloading pyspark-3.5.0.tar.gz (316.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m316.9/316.9 MB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pyspark
  Building wheel for pyspark (setup.py) ... [?25l[?25hdone
  Created wheel for pyspark: filename=pyspark-3.5.0-py2.py3-none-any.whl size=317425344 sha256=63b6d058f3855005cc458d8b312f4c57320ffae4f6ee492a8b61e503264b5202
  Stored in directory: /root/.cache/pip/wheels/41/4e/10/c2cf2467f71c678cfc8a6b9ac9241e5e44a01940da8fbb17fc
Successfully built pyspark
Installing collected packages: pyspark
Successfully installed pyspark-3.5.0
The following additional packages will be installed:
  libxtst6 openjdk-8-jre-headless
Suggested packages:
  openjdk-8-demo openjdk-8-source libnss-mdns fonts-dejavu-extra fonts-nanum
  fonts-ipafont-gothic fonts-ipafont-mincho fonts-wqy-microhei
  fonts-wqy-zenhei fonts-indi

In [None]:
import pyspark
from pyspark import SparkContext, SparkConf
from pyspark.sql import *
from pyspark.sql.functions import *
import pandas as pd
import numpy as np

In [None]:
# initiate SparkContext
conf = SparkConf().setAppName("ChihaoShen").setMaster("local[*]")
sc = SparkContext(conf=conf)
spark = SparkSession.builder.getOrCreate()
spark

In [None]:
# read data
data = sc.textFile("/content/drive/MyDrive/CSC4008/ass/data-Q2/ego-facebook.txt")
edges = data.map(lambda line: line.split())
undirected_map = edges.flatMap(lambda edge: [(int(edge[0]), int(edge[1])), (int(edge[1]), int(edge[0]))])

# find existing friends using dict
friends = undirected_map.map(lambda edge: (edge[0], [edge[1]])).reduceByKey(lambda a, b: a + b).collectAsMap()

# find potential pairs
potential_pairs = undirected_map.join(undirected_map).filter(lambda pair: pair[1][0] != pair[1][1]).map(lambda x: x[1]).map(lambda pair: tuple(sorted(pair))).distinct()
potential_pairs = potential_pairs.subtract(undirected_map)

# find mutual friends
mutual_friends = potential_pairs.map(lambda pair: (pair, set(friends[pair[0]]).intersection(set(friends[pair[1]]))))

# count friends number
mutual_friends_count = mutual_friends.flatMap(lambda x: [(x[0][0], (x[0][1], len(x[1]))), (x[0][1], (x[0][0], len(x[1])))])
mutual_friends_count = mutual_friends_count.map(lambda edge: (edge[0], [edge[1]])).reduceByKey(lambda a, b: a + b)

# sort by number of friends
sorted_count = mutual_friends_count.map(lambda x: (x[0], sorted(x[1], key=lambda y: (-y[1], y[0]))))

# output
recommendations = sorted_count.map(lambda x: (x[0], x[1][:10])).sortByKey().collect()
for i, j in recommendations:
  print(i, end="\t")
  l = list()
  for m in j:
    l.append(m[0])
  print(l)

1	[710, 603, 714, 1525, 289, 290, 291, 292, 293, 294]
2	[3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
3	[2, 4, 5, 6, 7, 8, 9, 10, 11, 12]
4	[2, 3, 5, 6, 7, 8, 9, 10, 11, 12]
5	[2, 3, 4, 6, 7, 8, 9, 10, 11, 12]
6	[2, 3, 4, 5, 7, 8, 9, 10, 11, 12]
7	[2, 3, 4, 5, 6, 8, 9, 10, 11, 12]
8	[2, 3, 4, 5, 6, 7, 9, 10, 11, 12]
9	[2, 3, 4, 5, 6, 7, 8, 10, 11, 12]
10	[2, 3, 4, 5, 6, 7, 8, 9, 11, 12]
11	[2, 3, 4, 5, 6, 7, 8, 9, 10, 12]
12	[2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
13	[2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
14	[2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
15	[2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
16	[2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
17	[2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
18	[2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
19	[2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
20	[2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
21	[2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
22	[2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
23	[2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
24	[2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
25	[2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
26	[2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
27	[2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
28	[2, 3, 

In [None]:
output = [10, 152, 288, 603, 714, 1525, 2434, 2681]
for i, j in recommendations:
  if i in output:
    print(i, end="\t")
    l = list()
    for m in j:
      l.append(m[0])
    print(l)

10	[2, 3, 4, 5, 6, 7, 8, 9, 11, 12]
152	[2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
288	[71, 1525, 69, 90, 217, 2348, 2351, 2352, 2354, 2356]
603	[1, 289, 290, 291, 292, 293, 294, 295, 296, 297]
714	[1, 712, 713, 715, 717, 718, 1525, 90, 217, 247]
1525	[288, 1, 710, 714, 603]
2434	[71, 288, 711, 716, 719, 720, 2348, 2351, 2352, 2354]
2681	[71, 288, 710, 711, 716, 719, 720, 721, 722, 2348]
