In [1]:
!pip install pyspark

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pyspark
  Downloading pyspark-3.4.0.tar.gz (310.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m310.8/310.8 MB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pyspark
  Building wheel for pyspark (setup.py) ... [?25l[?25hdone
  Created wheel for pyspark: filename=pyspark-3.4.0-py2.py3-none-any.whl size=311317130 sha256=e93d96b1edad3f8ae1b0ac549f0bdb2b002266c9a358d42e1a6fd28d7dee9995
  Stored in directory: /root/.cache/pip/wheels/7b/1b/4b/3363a1d04368e7ff0d408e57ff57966fcdf00583774e761327
Successfully built pyspark
Installing collected packages: pyspark
Successfully installed pyspark-3.4.0


In [2]:
import pyspark
from pyspark import SparkContext
from pyspark.sql import SparkSession

In [3]:
sc = SparkSession.builder.appName("MovieReco").getOrCreate()

raw_data = sc.sparkContext.textFile("/content/InputData_1.txt")

Testing Input Data

In [4]:
raw_data.collect()

['1,10001,5.0',
 '1,10002,3.0',
 '1,10003,2.5',
 '2,10001,2.0',
 '2,10002,2.5',
 '2,10003,5.0',
 '2,10004,2.0',
 '3,10001,2.0',
 '3,10004,4.0',
 '3,10005,4.5',
 '3,10007,5.0',
 '4,10001,5.0',
 '4,10003,3.0',
 '4,10004,4.5',
 '4,10006,4.0',
 '5,10001,4.0',
 '5,10002,3.0',
 '5,10003,2.0',
 '5,10004,4.0',
 '5,10005,3.5',
 '5,10006,4.0']

Task 1 Map

In [7]:
mapped_rdd = raw_data.map(lambda line: line.split(",")) \
    .map(lambda record: (record[0], record[1] + ":" + record[2]))

# print the first 10 elements of the mapped RDD for verification
mapped_rdd.collect()

[('1', '10001:5.0'),
 ('1', '10002:3.0'),
 ('1', '10003:2.5'),
 ('2', '10001:2.0'),
 ('2', '10002:2.5'),
 ('2', '10003:5.0'),
 ('2', '10004:2.0'),
 ('3', '10001:2.0'),
 ('3', '10004:4.0'),
 ('3', '10005:4.5'),
 ('3', '10007:5.0'),
 ('4', '10001:5.0'),
 ('4', '10003:3.0'),
 ('4', '10004:4.5'),
 ('4', '10006:4.0'),
 ('5', '10001:4.0'),
 ('5', '10002:3.0'),
 ('5', '10003:2.0'),
 ('5', '10004:4.0'),
 ('5', '10005:3.5'),
 ('5', '10006:4.0')]

Task 1 Reduce

In [8]:
reduced_rdd = mapped_rdd.reduceByKey(lambda rating1, rating2: rating1 + "," + rating2)

# print the results for verification
for element in reduced_rdd.collect():
    print(element)

('1', '10001:5.0,10002:3.0,10003:2.5')
('4', '10001:5.0,10003:3.0,10004:4.5,10006:4.0')
('2', '10001:2.0,10002:2.5,10003:5.0,10004:2.0')
('3', '10001:2.0,10004:4.0,10005:4.5,10007:5.0')
('5', '10001:4.0,10002:3.0,10003:2.0,10004:4.0,10005:3.5,10006:4.0')


Task 2 Map

In [9]:
def map_pairs(user_ratings):
    pairs = []
    items = user_ratings.split(',')
    for i in range(len(items)):
        item_i = items[i].split(':')[0]
        for j in range(i+1, len(items)):
            item_j = items[j].split(':')[0]
            pairs.append((item_i + ':' + item_j, 1))
            pairs.append((item_j + ':' + item_i, 1))
    return pairs


pair_rdd = reduced_rdd.flatMap(lambda x: map_pairs(x[1]))

for element in pair_rdd.collect():
    print(element)

('10001:10002', 1)
('10002:10001', 1)
('10001:10003', 1)
('10003:10001', 1)
('10002:10003', 1)
('10003:10002', 1)
('10001:10003', 1)
('10003:10001', 1)
('10001:10004', 1)
('10004:10001', 1)
('10001:10006', 1)
('10006:10001', 1)
('10003:10004', 1)
('10004:10003', 1)
('10003:10006', 1)
('10006:10003', 1)
('10004:10006', 1)
('10006:10004', 1)
('10001:10002', 1)
('10002:10001', 1)
('10001:10003', 1)
('10003:10001', 1)
('10001:10004', 1)
('10004:10001', 1)
('10002:10003', 1)
('10003:10002', 1)
('10002:10004', 1)
('10004:10002', 1)
('10003:10004', 1)
('10004:10003', 1)
('10001:10004', 1)
('10004:10001', 1)
('10001:10005', 1)
('10005:10001', 1)
('10001:10007', 1)
('10007:10001', 1)
('10004:10005', 1)
('10005:10004', 1)
('10004:10007', 1)
('10007:10004', 1)
('10005:10007', 1)
('10007:10005', 1)
('10001:10002', 1)
('10002:10001', 1)
('10001:10003', 1)
('10003:10001', 1)
('10001:10004', 1)
('10004:10001', 1)
('10001:10005', 1)
('10005:10001', 1)
('10001:10006', 1)
('10006:10001', 1)
('10002:1000

Task 2 Reduce

In [10]:
def reducer(a, b):
    return a + b

co_occurrence_rdd = pair_rdd.reduceByKey(reducer)
co_occurrence_rdd.collect()

[('10001:10003', 4),
 ('10002:10003', 3),
 ('10001:10006', 2),
 ('10003:10004', 3),
 ('10006:10003', 2),
 ('10004:10006', 2),
 ('10006:10004', 2),
 ('10001:10005', 2),
 ('10005:10001', 2),
 ('10001:10007', 1),
 ('10004:10005', 2),
 ('10005:10004', 2),
 ('10007:10004', 1),
 ('10005:10007', 1),
 ('10007:10005', 1),
 ('10006:10002', 1),
 ('10006:10005', 1),
 ('10001:10002', 3),
 ('10002:10001', 3),
 ('10003:10001', 4),
 ('10003:10002', 3),
 ('10001:10004', 4),
 ('10004:10001', 4),
 ('10006:10001', 2),
 ('10004:10003', 3),
 ('10003:10006', 2),
 ('10002:10004', 2),
 ('10004:10002', 2),
 ('10007:10001', 1),
 ('10004:10007', 1),
 ('10002:10005', 1),
 ('10005:10002', 1),
 ('10002:10006', 1),
 ('10003:10005', 1),
 ('10005:10003', 1),
 ('10005:10006', 1)]

Task 3 Map

In [11]:
def mapper(record):
    movie_pair, count = record
    movie_idi, movie_idj = movie_pair.split(':')
    return (movie_idi, f"{movie_idj}:{count}")

mapped_rdd = co_occurrence_rdd.map(mapper)

mapped_rdd.collect()

[('10001', '10003:4'),
 ('10002', '10003:3'),
 ('10001', '10006:2'),
 ('10003', '10004:3'),
 ('10006', '10003:2'),
 ('10004', '10006:2'),
 ('10006', '10004:2'),
 ('10001', '10005:2'),
 ('10005', '10001:2'),
 ('10001', '10007:1'),
 ('10004', '10005:2'),
 ('10005', '10004:2'),
 ('10007', '10004:1'),
 ('10005', '10007:1'),
 ('10007', '10005:1'),
 ('10006', '10002:1'),
 ('10006', '10005:1'),
 ('10001', '10002:3'),
 ('10002', '10001:3'),
 ('10003', '10001:4'),
 ('10003', '10002:3'),
 ('10001', '10004:4'),
 ('10004', '10001:4'),
 ('10006', '10001:2'),
 ('10004', '10003:3'),
 ('10003', '10006:2'),
 ('10002', '10004:2'),
 ('10004', '10002:2'),
 ('10007', '10001:1'),
 ('10004', '10007:1'),
 ('10002', '10005:1'),
 ('10005', '10002:1'),
 ('10002', '10006:1'),
 ('10003', '10005:1'),
 ('10005', '10003:1'),
 ('10005', '10006:1')]

Task 3 making changes to the input for reduce

In [12]:
# Step 1: Extract movieIdi and movieIdj:count pairs
pair_rdd = mapped_rdd.map(lambda x: (x[0], x[1].split(':'))) \
                    .mapValues(lambda x: [(x[i], int(x[i + 1])) for i in range(0, len(x), 2)])

# Step 2: Combine counts for each movieIdi
combined_rdd = pair_rdd.reduceByKey(lambda x, y: x + y)

# Step 3: Format the output
new_rdd = combined_rdd.map(lambda x: (x[0], tuple(x[1])))

# Print the new RDD
new_rdd.collect()

[('10001',
  (('10003', 4),
   ('10006', 2),
   ('10005', 2),
   ('10007', 1),
   ('10002', 3),
   ('10004', 4))),
 ('10004',
  (('10006', 2),
   ('10005', 2),
   ('10001', 4),
   ('10003', 3),
   ('10002', 2),
   ('10007', 1))),
 ('10007', (('10004', 1), ('10005', 1), ('10001', 1))),
 ('10002',
  (('10003', 3), ('10001', 3), ('10004', 2), ('10005', 1), ('10006', 1))),
 ('10003',
  (('10004', 3), ('10001', 4), ('10002', 3), ('10006', 2), ('10005', 1))),
 ('10006',
  (('10003', 2), ('10004', 2), ('10002', 1), ('10005', 1), ('10001', 2))),
 ('10005',
  (('10001', 2),
   ('10004', 2),
   ('10007', 1),
   ('10002', 1),
   ('10003', 1),
   ('10006', 1)))]

Task 3 Reduce

In [13]:
# Step 1: Calculate the total count for each movieIdj
total_counts_rdd = new_rdd.flatMap(lambda x: [(item[0], item[1]) for item in x[1]]) \
                          .reduceByKey(lambda x, y: x + y)

# Step 2: Calculate movieIdi=countj/totalCount for each movieIdj
reduc_rdd3 = new_rdd.flatMap(lambda x: [(item[0], (x[0], item[1])) for item in x[1]]) \
                    .join(total_counts_rdd) \
                    .map(lambda x: (x[1][0][0], x[0] + "=" + str(x[1][0][1] / x[1][1])))

# Print the reduc_rdd3
reduc_rdd3.collect()

[('10001', '10007=0.3333333333333333'),
 ('10004', '10007=0.3333333333333333'),
 ('10005', '10007=0.3333333333333333'),
 ('10001', '10002=0.3'),
 ('10004', '10002=0.2'),
 ('10003', '10002=0.3'),
 ('10006', '10002=0.1'),
 ('10005', '10002=0.1'),
 ('10001', '10004=0.2857142857142857'),
 ('10007', '10004=0.07142857142857142'),
 ('10002', '10004=0.14285714285714285'),
 ('10003', '10004=0.21428571428571427'),
 ('10006', '10004=0.14285714285714285'),
 ('10005', '10004=0.14285714285714285'),
 ('10004', '10001=0.25'),
 ('10007', '10001=0.0625'),
 ('10002', '10001=0.1875'),
 ('10003', '10001=0.25'),
 ('10006', '10001=0.125'),
 ('10005', '10001=0.125'),
 ('10001', '10003=0.3076923076923077'),
 ('10004', '10003=0.23076923076923078'),
 ('10002', '10003=0.23076923076923078'),
 ('10006', '10003=0.15384615384615385'),
 ('10005', '10003=0.07692307692307693'),
 ('10001', '10006=0.25'),
 ('10004', '10006=0.25'),
 ('10002', '10006=0.125'),
 ('10003', '10006=0.25'),
 ('10005', '10006=0.125'),
 ('10001', '

Task 4 Map

In [14]:
# Step 1: Map the raw_data RDD to the desired format
map_rdd4 = raw_data.map(lambda x: (x.split(',')[0], float(x.split(',')[2])))

# Print the map_rdd4
for item in map_rdd4.collect():
    print(item)

('1', 5.0)
('1', 3.0)
('1', 2.5)
('2', 2.0)
('2', 2.5)
('2', 5.0)
('2', 2.0)
('3', 2.0)
('3', 4.0)
('3', 4.5)
('3', 5.0)
('4', 5.0)
('4', 3.0)
('4', 4.5)
('4', 4.0)
('5', 4.0)
('5', 3.0)
('5', 2.0)
('5', 4.0)
('5', 3.5)
('5', 4.0)


Making rdd that is suitable for input of Reduce for task 4

In [15]:
#Input for Reduce 4

# Step 1: Combine ratings for each userId
intermediate_4 = map_rdd4.groupByKey()

# Assuming you have an existing SparkSession named 'spark'
# and 'intermediate_4' contains the RDD with the format userId, (rating1, rating2, ...)

# Step 1: Convert ResultIterable to list of ratings for each userId
intermediate_4 = intermediate_4.mapValues(list)

# Print the intermediate_4
for item in intermediate_4.collect():
    print(item)

('1', [5.0, 3.0, 2.5])
('4', [5.0, 3.0, 4.5, 4.0])
('2', [2.0, 2.5, 5.0, 2.0])
('3', [2.0, 4.0, 4.5, 5.0])
('5', [4.0, 3.0, 2.0, 4.0, 3.5, 4.0])


Reduce for Task 4

In [16]:
# Step 1: Calculate the average rating for each userId
reduce_rdd4 = intermediate_4.map(lambda x: (x[0], sum(x[1]) / len(x[1])))

# Print the reduce_rdd4
for item in reduce_rdd4.collect():
    print(item)

('1', 3.5)
('4', 4.125)
('2', 2.875)
('3', 3.875)
('5', 3.4166666666666665)


Task 5 Map 1 is an rdd that we already made before so we use that directly

Task 5 Map2

In [17]:
# 'raw_rdd' contains the RDD with the format userId,movieId,rating

# Step 1: Map the input to the desired format
map_rdd5 = raw_data.map(lambda x: (x.split(',')[1], x.split(',')[0] + ':' + x.split(',')[2]))

# Print the map_rdd5
map_rdd5.collect()

[('10001', '1:5.0'),
 ('10002', '1:3.0'),
 ('10003', '1:2.5'),
 ('10001', '2:2.0'),
 ('10002', '2:2.5'),
 ('10003', '2:5.0'),
 ('10004', '2:2.0'),
 ('10001', '3:2.0'),
 ('10004', '3:4.0'),
 ('10005', '3:4.5'),
 ('10007', '3:5.0'),
 ('10001', '4:5.0'),
 ('10003', '4:3.0'),
 ('10004', '4:4.5'),
 ('10006', '4:4.0'),
 ('10001', '5:4.0'),
 ('10002', '5:3.0'),
 ('10003', '5:2.0'),
 ('10004', '5:4.0'),
 ('10005', '5:3.5'),
 ('10006', '5:4.0')]

Intermediate for Task 5 creating reduc_rdd5 input

In [18]:
# and 'reduc_rdd3' contains the RDD with the format (movieidk1, movieid1=weighti1, movieidk2, movieid2=weighti2, ...)
# and 'map_rdd5' contains the RDD with the format movieId, userId:rating

# Step 1: Transform reduc_rdd3 into a key-value pair RDD with movieIdk as the key
reduc_rdd3_kv = reduc_rdd3.map(lambda x: (x[0], x[1]))

# Step 2: Join reduc_rdd3_kv with map_rdd5 based on movieId
intermediate_5 = reduc_rdd3_kv.join(map_rdd5)

# Print the intermediate_5
for item in intermediate_5.collect():
    print(item)


('10001', ('10007=0.3333333333333333', '1:5.0'))
('10001', ('10007=0.3333333333333333', '2:2.0'))
('10001', ('10007=0.3333333333333333', '3:2.0'))
('10001', ('10007=0.3333333333333333', '4:5.0'))
('10001', ('10007=0.3333333333333333', '5:4.0'))
('10001', ('10002=0.3', '1:5.0'))
('10001', ('10002=0.3', '2:2.0'))
('10001', ('10002=0.3', '3:2.0'))
('10001', ('10002=0.3', '4:5.0'))
('10001', ('10002=0.3', '5:4.0'))
('10001', ('10004=0.2857142857142857', '1:5.0'))
('10001', ('10004=0.2857142857142857', '2:2.0'))
('10001', ('10004=0.2857142857142857', '3:2.0'))
('10001', ('10004=0.2857142857142857', '4:5.0'))
('10001', ('10004=0.2857142857142857', '5:4.0'))
('10001', ('10003=0.3076923076923077', '1:5.0'))
('10001', ('10003=0.3076923076923077', '2:2.0'))
('10001', ('10003=0.3076923076923077', '3:2.0'))
('10001', ('10003=0.3076923076923077', '4:5.0'))
('10001', ('10003=0.3076923076923077', '5:4.0'))
('10001', ('10006=0.25', '1:5.0'))
('10001', ('10006=0.25', '2:2.0'))
('10001', ('10006=0.25', 

In [19]:
combined_rows = intermediate_5.reduceByKey(lambda x, y: x if y in x else x + y)

# Print the combined rows
for row in combined_rows.collect():
    print(row)

('10001', ('10007=0.3333333333333333', '1:5.0', '10007=0.3333333333333333', '2:2.0', '10007=0.3333333333333333', '3:2.0', '10007=0.3333333333333333', '4:5.0', '10007=0.3333333333333333', '5:4.0', '10002=0.3', '1:5.0', '10002=0.3', '2:2.0', '10002=0.3', '3:2.0', '10002=0.3', '4:5.0', '10002=0.3', '5:4.0', '10004=0.2857142857142857', '1:5.0', '10004=0.2857142857142857', '2:2.0', '10004=0.2857142857142857', '3:2.0', '10004=0.2857142857142857', '4:5.0', '10004=0.2857142857142857', '5:4.0', '10003=0.3076923076923077', '1:5.0', '10003=0.3076923076923077', '2:2.0', '10003=0.3076923076923077', '3:2.0', '10003=0.3076923076923077', '4:5.0', '10003=0.3076923076923077', '5:4.0', '10006=0.25', '1:5.0', '10006=0.25', '2:2.0', '10006=0.25', '3:2.0', '10006=0.25', '4:5.0', '10006=0.25', '5:4.0', '10005=0.25', '1:5.0', '10005=0.25', '2:2.0', '10005=0.25', '3:2.0', '10005=0.25', '4:5.0', '10005=0.25', '5:4.0'))
('10007', ('10004=0.07142857142857142', '3:5.0', '10001=0.0625', '3:5.0', '10005=0.125', '3:5

In [20]:
red5_input = combined_rows.mapValues(lambda x: tuple(set(x)))
for row in red5_input.collect():
    print(row)

('10001', ('5:4.0', '10004=0.2857142857142857', '10002=0.3', '10003=0.3076923076923077', '10006=0.25', '4:5.0', '2:2.0', '10005=0.25', '3:2.0', '10007=0.3333333333333333', '1:5.0'))
('10007', ('10004=0.07142857142857142', '3:5.0', '10005=0.125', '10001=0.0625'))
('10005', ('10003=0.07692307692307693', '3:4.5', '10001=0.125', '10004=0.14285714285714285', '10006=0.125', '5:3.5', '10002=0.1', '10007=0.3333333333333333'))
('10006', ('5:4.0', '10003=0.15384615384615385', '10004=0.14285714285714285', '10005=0.125', '4:4.0', '10002=0.1', '10001=0.125'))
('10004', ('5:4.0', '10002=0.2', '10001=0.25', '10006=0.25', '4:4.5', '2:2.0', '10003=0.23076923076923078', '3:4.0', '10005=0.25', '10007=0.3333333333333333'))
('10003', ('1:2.5', '10001=0.25', '10002=0.3', '2:5.0', '10004=0.21428571428571427', '10005=0.125', '5:2.0', '10006=0.25', '4:3.0'))
('10002', ('10001=0.1875', '5:3.0', '1:3.0', '2:2.5', '10004=0.14285714285714285', '10003=0.23076923076923078', '10005=0.125', '10006=0.125'))


In [21]:
red5_input2 = red5_input.mapValues(lambda x: tuple(sorted(x, key=lambda v: v if '=' in v else '', reverse=True)))

for row in red5_input2.collect():
    print(row)

('10001', ('10007=0.3333333333333333', '10006=0.25', '10005=0.25', '10004=0.2857142857142857', '10003=0.3076923076923077', '10002=0.3', '5:4.0', '4:5.0', '2:2.0', '3:2.0', '1:5.0'))
('10007', ('10005=0.125', '10004=0.07142857142857142', '10001=0.0625', '3:5.0'))
('10005', ('10007=0.3333333333333333', '10006=0.125', '10004=0.14285714285714285', '10003=0.07692307692307693', '10002=0.1', '10001=0.125', '3:4.5', '5:3.5'))
('10006', ('10005=0.125', '10004=0.14285714285714285', '10003=0.15384615384615385', '10002=0.1', '10001=0.125', '5:4.0', '4:4.0'))
('10004', ('10007=0.3333333333333333', '10006=0.25', '10005=0.25', '10003=0.23076923076923078', '10002=0.2', '10001=0.25', '5:4.0', '4:4.5', '2:2.0', '3:4.0'))
('10003', ('10006=0.25', '10005=0.125', '10004=0.21428571428571427', '10002=0.3', '10001=0.25', '1:2.5', '2:5.0', '5:2.0', '4:3.0'))
('10002', ('10006=0.125', '10005=0.125', '10004=0.14285714285714285', '10003=0.23076923076923078', '10001=0.1875', '5:3.0', '1:3.0', '2:2.5'))


Reduce for Task 5

In [22]:
reduced_rdd5 = intermediate_5.flatMap(lambda x: [(f"{r.split(':')[0]}:{m.split('=')[0]}", float(m.split('=')[1]) * float(r.split(':')[1])) for m in x[1] if '=' in m for r in x[1] if ':' in r])

for x in reduced_rdd5.collect():
  print(x)

('1:10007', 1.6666666666666665)
('2:10007', 0.6666666666666666)
('3:10007', 0.6666666666666666)
('4:10007', 1.6666666666666665)
('5:10007', 1.3333333333333333)
('1:10002', 1.5)
('2:10002', 0.6)
('3:10002', 0.6)
('4:10002', 1.5)
('5:10002', 1.2)
('1:10004', 1.4285714285714284)
('2:10004', 0.5714285714285714)
('3:10004', 0.5714285714285714)
('4:10004', 1.4285714285714284)
('5:10004', 1.1428571428571428)
('1:10003', 1.5384615384615385)
('2:10003', 0.6153846153846154)
('3:10003', 0.6153846153846154)
('4:10003', 1.5384615384615385)
('5:10003', 1.2307692307692308)
('1:10006', 1.25)
('2:10006', 0.5)
('3:10006', 0.5)
('4:10006', 1.25)
('5:10006', 1.0)
('1:10005', 1.25)
('2:10005', 0.5)
('3:10005', 0.5)
('4:10005', 1.25)
('5:10005', 1.0)
('3:10004', 0.3571428571428571)
('3:10001', 0.3125)
('3:10005', 0.625)
('3:10007', 1.5)
('5:10007', 1.1666666666666665)
('3:10002', 0.45)
('5:10002', 0.35000000000000003)
('3:10004', 0.6428571428571428)
('5:10004', 0.5)
('3:10001', 0.5625)
('5:10001', 0.4375)
(

Map for Task 6 is an rdd we already have

Following is the reduce for Task 6

In [23]:
result_rdd = reduced_rdd5.reduceByKey(lambda a, b: a + b)

result_rdd.collect()

[('4:10007', 3.1666666666666665),
 ('1:10002', 2.25),
 ('4:10002', 3.6999999999999997),
 ('4:10003', 3.1923076923076925),
 ('4:10001', 2.375),
 ('4:10004', 2.6428571428571423),
 ('2:10003', 1.6538461538461542),
 ('2:10006', 2.5625),
 ('3:10006', 2.0625),
 ('3:10001', 1.875),
 ('2:10001', 2.21875),
 ('5:10002', 3.35),
 ('3:10004', 1.5714285714285712),
 ('1:10006', 2.25),
 ('5:10001', 3.0),
 ('1:10007', 1.6666666666666665),
 ('2:10002', 2.5),
 ('1:10004', 2.3928571428571423),
 ('2:10004', 1.9999999999999998),
 ('2:10005', 1.9375),
 ('2:10007', 1.3333333333333333),
 ('3:10007', 3.5),
 ('1:10003', 2.230769230769231),
 ('1:10005', 1.9375),
 ('5:10005', 3.125),
 ('1:10001', 1.1875),
 ('5:10007', 3.833333333333333),
 ('3:10002', 1.85),
 ('5:10004', 3.0714285714285716),
 ('3:10003', 1.8846153846153846),
 ('5:10003', 3.730769230769231),
 ('4:10006', 3.125),
 ('5:10006', 3.3125),
 ('3:10005', 2.125),
 ('4:10005', 3.25)]

In [24]:
print("YAYAYAYAYAYYAAY DONEEEEEEE!!!!!!!!!!!!!!!!!!!!!!!!!!!")

YAYAYAYAYAYYAAY DONEEEEEEE!!!!!!!!!!!!!!!!!!!!!!!!!!!
