In [1]:
import pandas as pd
import numpy as np

In [2]:
# Read Echonest Taste profile dataset
#taste = pd.read_csv('./data/train_triplets.txt', sep='\t', header=None, names=['user_id','song_id','play_count'])

In [3]:
#taste.head()

In [4]:
#taste.shape

In [5]:
import os
import pyspark

In [6]:
from pyspark import SparkContext
sc = SparkContext()

In [7]:
taste_file = os.path.join('.','data','subset_train_taste_profile.csv')
taste_raw_data = sc.textFile(taste_file)

In [8]:
taste_raw_data

./data/subset_train_taste_profile.csv MapPartitionsRDD[1] at textFile at NativeMethodAccessorImpl.java:0

In [9]:
taste_raw_data_header = taste_raw_data.take(1)[0]

In [10]:
taste_raw_data_header

u'user_index,song_index,play_count'

In [11]:
# Remove first row as header, split each row into token
taste_data = taste_raw_data.filter(lambda line: line!=taste_raw_data_header)\
            .map(lambda line: line.split(",")).map(lambda tokens: (int(tokens[0]),int(tokens[1]),int(tokens[2]))).cache()

In [12]:
taste_data.take(5)

[(12, 251, 1), (12, 547, 3), (12, 549, 1), (12, 551, 1), (12, 552, 2)]

In [13]:
# Load song file
song_file = os.path.join('.','data','song_encode_meta.csv')
song_raw_data = sc.textFile(song_file)

In [14]:
song_raw_data.count()

385257

In [15]:
song_raw_data_header = song_raw_data.take(1)[0]

In [16]:
song_raw_data_header

u'song_index,song_title,artist_name'

In [17]:
# Remove first row as header, split each row into token
song_data = song_raw_data.filter(lambda line: line!=song_raw_data_header)\
           .map(lambda line: line.split(",")).map(lambda tokens: (int(tokens[0]),tokens[1],tokens[2])).cache()

In [18]:
song_data.take(5)

[(0, u'The Cove', u'Jack Johnson'),
 (1, u'Nothing from Nothing', u'Billy Preston'),
 (2, u'Entre Dos Aguas', u'Paco De Lucia'),
 (3, u'Under Cold Blue Stars', u'Josh Rouse'),
 (4, u'Riot Radio (Soundtrack Version)', u'The Dead 60s')]

In [19]:
#from pyspark.sql import SQLContext, Row
#sqlContext = SQLContext(sc)
#taste_rows = taste_data.map(lambda x: Row(user_id=x[0], song_id=x[1], play_count=int(x[2])))
#taste_dataframe = sqlContext.createDataFrame(taste_data, ["user_id", "song_id", "play_count"])

In [20]:
#taste_dataframe.take(5)

In [21]:
# StringIndexer 
#from pyspark.ml.feature import StringIndexer

#userIndex = StringIndexer(inputCol="user_id", outputCol="user_index")
#user_ID = userIndex.fit(taste_dataframe).transform(taste_dataframe)

In [22]:
#user_ID.show(10)

In [23]:
# Split data into train, validation and test set
#training_RDD, validation_RDD, test_RDD = taste_data.randomSplit([6, 2, 2], seed=0L)
#validation_for_predict_RDD = validation_RDD.map(lambda x: (x[0], x[1]))
#test_for_predict_RDD = test_RDD.map(lambda x: (x[0], x[1]))

In [24]:
#training_RDD.take(5)

In [25]:
#training_RDD.count(), validation_RDD.count(), test_RDD.count()

### Train model

In [26]:
# Model selection (ALS parameter tuning)
from pyspark.mllib.recommendation import ALS
import math

seed = 5L
iterations = 10
regularization_parameter = 0.1
rank = 4
errors = 0
err = 0
tolerance = 0.02

In [27]:
#model = ALS.trainImplicit(training_RDD, rank, seed=seed, iterations=iterations, lambda_=regularization_parameter)
model = ALS.trainImplicit(taste_data, rank, seed=seed, iterations=iterations, lambda_=regularization_parameter)

In [28]:
for_predict_taste_data_RDD = taste_data.map(lambda x: (x[0], x[1]))

In [29]:
predicted_RDD = model.predictAll(for_predict_taste_data_RDD)

In [30]:
predicted_RDD.take(5)

[Rating(user=33068, product=19984, rating=0.009895828146082433),
 Rating(user=686124, product=19984, rating=0.04123225770756973),
 Rating(user=7256, product=19984, rating=0.03591582759475694),
 Rating(user=185632, product=19984, rating=0.016411966334172468),
 Rating(user=155372, product=19984, rating=0.018846302190289342)]

In [31]:
predicted_RDD.count(), taste_data.count()

(121134, 121134)

In [32]:
predicted_RDD

MapPartitionsRDD[253] at mapPartitions at PythonMLLibAPI.scala:1335

In [33]:
predicted_rating_RDD = predicted_RDD.map(lambda x: (x.product, x.rating))

In [34]:
predicted_rating_RDD.take(5)

[(19984, 0.009895828146082433),
 (19984, 0.04123225770756973),
 (19984, 0.03591582759475694),
 (19984, 0.016411966334172468),
 (19984, 0.018846302190289342)]

In [35]:
model.userFeatures().take(3)

[(12,
  array('d', [0.44985446333885193, 0.24813218414783478, 0.5085510611534119, 0.7732874751091003])),
 (854,
  array('d', [0.036118317395448685, 0.1357400119304657, -0.22276577353477478, 0.0885675698518753])),
 (1158,
  array('d', [0.008461867459118366, 0.02797505259513855, -0.023137087002396584, 0.015216836705803871]))]

In [36]:
model.productFeatures().take(3)

[(0,
  array('d', [0.0024921416770666838, 0.08650188893079758, -0.07545054703950882, 0.03180377930402756])),
 (2,
  array('d', [0.006477401126176119, 0.03109273500740528, -0.013818426989018917, 0.013780723325908184])),
 (10,
  array('d', [0.016179725527763367, 0.1947890669107437, -0.21717561781406403, 0.06901038438081741]))]

In [37]:
model.recommendProducts(12, 10)

[Rating(user=12, product=708, rating=1.1082731204655647),
 Rating(user=12, product=242, rating=1.0422475385823557),
 Rating(user=12, product=655, rating=1.0236320500007836),
 Rating(user=12, product=669, rating=1.0155398259067892),
 Rating(user=12, product=651, rating=0.9997518936265681),
 Rating(user=12, product=604, rating=0.9922758760114345),
 Rating(user=12, product=596, rating=0.985448772542493),
 Rating(user=12, product=689, rating=0.9696696630356376),
 Rating(user=12, product=553, rating=0.9584288960711813),
 Rating(user=12, product=664, rating=0.9560925201436099)]

In [38]:
model.recommendProducts(12, 10)[0].product

708

In [39]:
user_pred = [x.product for x in model.recommendProducts(12, 500)]

In [40]:
len(user_pred)

500

In [41]:
taste_data.take(3)

[(12, 251, 1), (12, 547, 3), (12, 549, 1)]

In [42]:
by_user_songs = taste_data.filter(lambda x: x[0] == 12)

In [43]:
by_user_songs.count()

151

In [44]:
by_user_songs.take(3)

[(12, 251, 1), (12, 547, 3), (12, 549, 1)]

In [45]:
by_user_songs.count()

151

In [46]:
user_labels = by_user_songs.map(lambda x: x[1]).collect()

In [47]:
test_predictionAndLabels = sc.parallelize([(user_pred, user_labels)])

In [48]:
test_predictionAndLabels

ParallelCollectionRDD[277] at parallelize at PythonRDD.scala:475

In [49]:
#by_user_songs_list = by_user_songs.map(lambda x: (x[0], x[1])).groupByKey()

In [50]:
from pyspark.mllib.evaluation import RankingMetrics
metrics = RankingMetrics(test_predictionAndLabels)

In [51]:
metrics

<pyspark.mllib.evaluation.RankingMetrics at 0x105fc3510>

In [52]:
metrics.meanAveragePrecision

0.7724980286276173

In [147]:
userRecommended = model.recommendProductsForUsers(5)

In [148]:
userRecommended.take(3)

[(468272,
  (Rating(user=468272, product=366, rating=0.05903648064763162),
   Rating(user=468272, product=200, rating=0.05607386465545883),
   Rating(user=468272, product=1276, rating=0.05484618775025832),
   Rating(user=468272, product=2487, rating=0.04953560733215151),
   Rating(user=468272, product=1238, rating=0.04735186942215971))),
 (947528,
  (Rating(user=947528, product=366, rating=0.2352631934198146),
   Rating(user=947528, product=1276, rating=0.2225964511845655),
   Rating(user=947528, product=2487, rating=0.1850394599176255),
   Rating(user=947528, product=200, rating=0.1848528402887608),
   Rating(user=947528, product=378, rating=0.18104094801378778))),
 (478404,
  (Rating(user=478404, product=200, rating=0.2307807919498683),
   Rating(user=478404, product=22, rating=0.22528303521150342),
   Rating(user=478404, product=205, rating=0.20673149269454158),
   Rating(user=478404, product=213, rating=0.20210625558391548),
   Rating(user=478404, product=231, rating=0.201125345445

In [149]:
user_reco = userRecommended.map(lambda x: (x[0], [r.product for r in x[1]]))

In [150]:
user_reco.take(3)

[(468272, [366, 200, 1276, 2487, 1238]),
 (947528, [366, 1276, 2487, 200, 378]),
 (478404, [200, 22, 205, 213, 231])]

In [151]:
taste_data

PythonRDD[3] at RDD at PythonRDD.scala:48

In [152]:
taste_data.take(3)

[(12, 251, 1), (12, 547, 3), (12, 549, 1)]

In [153]:
taste_data.map(lambda x: (x[0], x[1])).take(3)

[(12, 251), (12, 547), (12, 549)]

In [154]:
# *****
user_songs = taste_data.map(lambda x: (x[0], x[1])).groupByKey().mapValues(list)

In [155]:
predictionAndLabels = user_reco.join(user_songs)

In [156]:
predictionAndLabels.count()

1908

In [157]:
test_predictionAndLabels = predictionAndLabels.map(lambda x: x[1])

In [158]:
metrics = RankingMetrics(test_predictionAndLabels)

In [159]:
metrics.meanAveragePrecision

0.018383533315874452

In [146]:
#from pyspark.mllib.evaluation import RankingMetrics
# For each user
#user_pred = [x.product for x in model.recommendProducts(12, 500)]
#by_user_songs = taste_data.filter(lambda x: x[0] == 12)
#user_labels = by_user_songs.map(lambda x: x[1]).collect()
#test_predictionAndLabels = sc.parallelize([(user_pred, user_labels)])

#metrics = RankingMetrics(test_predictionAndLabels)
#metrics.meanAveragePrecision

In [34]:
predicted_rating_RDD.count()

199685

In [34]:
predictions = model.predictAll(testData).map(lambda r: ((r.user, r.product), r.rating))

In [35]:
predictions.take(5)

[((19222, 143464), 2.8624600155799347e-06),
 ((18506, 141268), 1.5453910456600827e-05),
 ((6676, 74884), 0.005455623977750502),
 ((9672, 74884), 0.022258532953375057),
 ((9844, 74884), 0.0048193730857909595)]

In [36]:
ratingsTuple = taste_data.map(lambda r: ((r[0], r[1]), r[2]))

In [37]:
ratingsTuple.count()

1000001

In [38]:
ratingsTuple.take(5)

[((0, 0), 1), ((0, 1), 1), ((0, 2), 2), ((0, 3), 1), ((0, 4), 1)]

In [39]:
scoreAndLabels = predictions.join(ratingsTuple).map(lambda tup: tup[1])

In [40]:
scoreAndLabels.count()

1000001

In [41]:
scoreAndLabels.take(5)

[(0.004218762839424005, 1),
 (0.00012628792733438484, 1),
 (0.0001789743493422559, 1),
 (6.622136780204846e-05, 1),
 (0.008803378688559314, 1)]

In [42]:
from pyspark.mllib.evaluation import RankingMetrics

In [43]:
test_predictionAndLabels = sc.parallelize([([1, 6, 2, 7, 8, 3, 9, 10, 4, 5], [1, 2, 3, 4, 5]), \
                                      ([4, 1, 5, 6, 2, 7, 3, 8, 9, 10], [1, 2, 3]), \
                                      ([1, 2, 3, 4, 5], [])])

In [44]:
test_predictionAndLabels.take(5)

[([1, 6, 2, 7, 8, 3, 9, 10, 4, 5], [1, 2, 3, 4, 5]),
 ([4, 1, 5, 6, 2, 7, 3, 8, 9, 10], [1, 2, 3]),
 ([1, 2, 3, 4, 5], [])]

In [45]:
metrics = RankingMetrics(test_predictionAndLabels)

In [46]:
metrics.meanAveragePrecision

0.35502645502645497

In [47]:
metrics.precisionAt(1)

0.3333333333333333

In [48]:
mtest = RankingMetrics(sc.parallelize([([1,2,3,4,5],[6,4,7,1,2])]))

In [49]:
mtest.meanAveragePrecision

0.55

In [50]:
mtest.precisionAt(2)

1.0

In [51]:
mtest2 = RankingMetrics(sc.parallelize([([6,4],[1,2,3,4,5])]))

In [52]:
mtest2.meanAveragePrecision

0.1

In [53]:
mtest2.precisionAt(2)

0.5

### Get song data

In [54]:
# Updates the play counts from the current data self.ratings_RDD
songID_with_playcount_RDD = taste_data.map(lambda x: (x[1], x[2])).groupByKey()

In [55]:
songID_with_playcount_RDD.count()

148039

In [56]:
songID_with_playcount_RDD.take(5)

[(0, <pyspark.resultiterable.ResultIterable at 0x113918a10>),
 (131072, <pyspark.resultiterable.ResultIterable at 0x113918f90>),
 (2, <pyspark.resultiterable.ResultIterable at 0x113918990>),
 (4, <pyspark.resultiterable.ResultIterable at 0x113933c50>),
 (6, <pyspark.resultiterable.ResultIterable at 0x113933d50>)]

In [57]:
def get_counts_and_averages(ID_and_playcount_tuple):
    """Given a tuple (songID, playcount_iterable) 
    returns (songID, (playcount, playcount_avg))
    """
    nplays = len(ID_and_playcount_tuple[1])
    return ID_and_playcount_tuple[0], (nplays, float(sum(x for x in ID_and_playcount_tuple[1]))/nplays)

In [58]:
# Updates the play counts from the current data self.ratings_RDD
songID_with_avg_playcount_RDD = songID_with_playcount_RDD.map(get_counts_and_averages)
song_playcounts_RDD = songID_with_avg_playcount_RDD.map(lambda x: (x[0], x[1][0]))

In [59]:
songID_with_avg_playcount_RDD.take(5)

[(0, (56, 2.4107142857142856)),
 (131072, (1, 1.0)),
 (2, (37, 3.72972972972973)),
 (4, (10, 2.7)),
 (6, (18, 4.222222222222222))]

In [60]:
song_playcounts_RDD.take(5)

[(0, 56), (131072, 1), (2, 37), (4, 10), (6, 18)]

### User history data for predict unplayed songs

In [61]:
# Get unrated (unplayed) songs
user_id = 1356
num_reco = 20
# Get pairs of (userID, movieID) for user_id unrated movies
user_unrated_movies_RDD = taste_data.filter(lambda rating: not rating[0] == user_id)\
                                             .map(lambda x: (user_id, x[1])).distinct()

In [62]:
taste_data.count()

1000001

In [63]:
user_unrated_movies_RDD.take(5)

[(1356, 83874), (1356, 98868), (1356, 122510), (1356, 7648), (1356, 22650)]

In [64]:
user_unrated_movies_RDD.count()

148027

In [65]:
# Predict rating (confidence of playcount)
user_predicted_RDD = model.predictAll(user_unrated_movies_RDD)

In [66]:
user_predicted_RDD.count()

148027

In [67]:
user_predicted_RDD.take(3)

[Rating(user=1356, product=141268, rating=3.043805167954241e-05),
 Rating(user=1356, product=143464, rating=1.2913471752926404e-05),
 Rating(user=1356, product=74884, rating=0.0020469142781164296)]

### it seems that if the song was not in training set, the model is unable to predict confidence level (rating)

In [68]:
reco_count = 25

In [69]:
predict_for_user = user_predicted_RDD.map(lambda x: (x.product, x.rating))

In [70]:
# Get predicted ratings
user_predicted_title_and_count_RDD = \
            predict_for_user.join(song_data).join(song_playcounts_RDD)
user_predicted_title_and_count_RDD = \
            user_predicted_title_and_count_RDD.map(lambda r: (r[1][0][1], r[1][0][0], r[1][1]))
top_reco_user = user_predicted_title_and_count_RDD.filter(lambda r: r[2]>=25).takeOrdered(reco_count, key=lambda x: -x[1])

In [71]:
reco_count, len(top_reco_user)

(25, 25)

In [72]:
user_predicted_title_and_count_RDD.take(5)

[(u'The Cove', 0.004673871028042951, 56),
 (u'The Reason', 6.758580126973646e-06, 1),
 (u"It's Only Love", 7.23364033862054e-05, 2),
 (u'Baby-Cry-Cry-Baby', 0.00015027069819297358, 2),
 (u'Cza Manca', 0.0001434847297677646, 2)]

In [73]:
user_predicted_title_and_count_RDD.count()

148646

In [74]:
top_reco_user

[(u'Sehr kosmisch', 0.10563094569031688, 2225),
 (u'Secrets', 0.10229641580604078, 1552),
 (u'Dog Days Are Over (Radio Edit)', 0.10176708988877221, 1908),
 (u'Fireflies', 0.0926475877608326, 1272),
 (u'Hey_ Soul Sister', 0.09130208940833587, 1247),
 (u'Use Somebody', 0.09055724158514109, 1069),
 (u'Use Somebody', 0.09055724158514109, 1069),
 (u'OMG', 0.08764602389656607, 1063),
 (u'The Scientist', 0.08523265412722475, 910),
 (u'Marry Me', 0.08282417835009775, 1000),
 (u'Clocks', 0.07859563833136879, 839),
 (u'Bulletproof', 0.07782294439709286, 850),
 (u'Pursuit Of Happiness (nightmare)', 0.0773691542782208, 886),
 (u'Lucky (Album Version)', 0.07733260400090944, 790),
 (u'Drop The World', 0.07727131049463085, 1033),
 (u'Billionaire [feat. Bruno Mars]  (Explicit Album Version)',
  0.0761039824804971,
  817),
 (u'The Only Exception (Album Version)', 0.07582814066480671, 738),
 (u'Revelry', 0.07427027892155405, 1594),
 (u'Undo', 0.07411668125827636, 1888),
 (u'Alejandro', 0.072316784526366

### Evaluation

In [75]:
# Precision
# Recall
# Expected percentile rank

In [76]:
#### Split data into training, validation and test set
training_RDD, validation_RDD, test_RDD = taste_data.randomSplit([6, 2, 2], seed=0L)
validation_for_predict_RDD = validation_RDD.map(lambda x: (x[0], x[1]))
test_for_predict_RDD = test_RDD.map(lambda x: (x[0], x[1]))

In [77]:
validation_for_predict_RDD.take(5)

[(0, 3), (0, 4), (0, 11), (0, 13), (0, 17)]

In [78]:
validation_RDD.take(5)

[(0, 3, 1), (0, 4, 1), (0, 11, 1), (0, 13, 5), (0, 17, 1)]

In [109]:
rank

4

In [79]:
model = ALS.trainImplicit(training_RDD, rank, seed=seed, iterations=iterations, lambda_=regularization_parameter)

In [80]:
predictions = model.predictAll(validation_for_predict_RDD).map(lambda r: (r[0], (r[1], r[2])))

In [110]:
predictions.count()

181399

In [81]:
predictions.take(5)

[(5588, (23776, 0.00020067667944805002)),
 (837, (23776, 0.0012788666472844778)),
 (9844, (74884, 0.00414099730202579)),
 (10350, (74884, 0.0009959825967532235)),
 (11090, (74884, 0.018708086873246096))]

In [82]:
predictions_by_user = predictions.groupByKey()

In [83]:
predictions_by_user.take(5)

[(0, <pyspark.resultiterable.ResultIterable at 0x11394b450>),
 (4096, <pyspark.resultiterable.ResultIterable at 0x113918410>),
 (11184, <pyspark.resultiterable.ResultIterable at 0x113918a50>),
 (4, <pyspark.resultiterable.ResultIterable at 0x113918910>),
 (15712, <pyspark.resultiterable.ResultIterable at 0x113918190>)]

In [84]:
predictions_by_user.count()

20226

In [97]:
def get_top_n_reco(ID_and_playcount_tuple,):
    """Given a tuple (userID, song_confidence_rating_iterable) 
    returns (userID, list of n recommended songs sorted by confidence rating descending)
    """
    n = 25
    sort_tuple = sorted([(x[0], x[1]) for x in ID_and_playcount_tuple[1]], key=lambda x: x[1], reverse=True)
    return ID_and_playcount_tuple[0], [x[0] for x in sort_tuple][:n]

In [98]:
# Get Top 5 recommendations
User_with_predicted_songs = predictions_by_user.map(get_top_n_reco)

In [99]:
User_with_predicted_songs.take(5)

[(0,
  [83, 93, 96, 88, 86, 63, 13, 74, 11, 66, 90, 48, 3, 17, 4, 62, 29, 53, 26]),
 (4096, [3555, 11065, 11374, 66815, 66819]),
 (11184, [218, 205, 2661, 8398]),
 (4, [172, 170, 180, 175, 171]),
 (15712, [27781, 27778, 27782])]

In [100]:
# Prepare user song data (user_id, [list of songs listened])
User_with_songs = validation_RDD.map(lambda x: (x[0], x[1])).groupByKey().map(lambda x: (x[0], list(x[1])))

In [101]:
User_with_songs.take(5)

[(0,
  [3,
   4,
   11,
   13,
   17,
   26,
   29,
   48,
   53,
   62,
   63,
   66,
   74,
   83,
   86,
   88,
   90,
   93,
   96,
   101]),
 (2, [127]),
 (4, [160, 170, 171, 172, 175, 180]),
 (6, [206, 22, 210, 213, 219]),
 (8, [239, 245, 246, 247, 248, 250])]

In [102]:
User_with_predicted_songs.count(), User_with_songs.count()

(20226, 20386)

In [103]:
Joined_Users = User_with_predicted_songs.join(User_with_songs)
#.map(lambda tup: tup[1])

In [104]:
predictionAndLabels = Joined_Users.map(lambda tup: tup[1])

In [105]:
predictionAndLabels.count()

20226

In [106]:
predictionAndLabels.take(5)

[([83, 93, 96, 88, 86, 63, 13, 74, 11, 66, 90, 48, 3, 17, 4, 62, 29, 53, 26],
  [3,
   4,
   11,
   13,
   17,
   26,
   29,
   48,
   53,
   62,
   63,
   66,
   74,
   83,
   86,
   88,
   90,
   93,
   96,
   101]),
 ([56639, 107636, 9669, 1522, 49448], [49448, 1522, 107636, 56639, 9669]),
 ([85674, 51057], [85674, 51057]),
 ([22, 206, 213, 219, 210], [206, 22, 210, 213, 219]),
 ([4652, 10800, 9131, 8502, 23269, 35667],
  [8502, 23269, 10800, 4652, 9131, 35667])]

In [107]:
metrics = RankingMetrics(predictionAndLabels)

In [108]:
metrics.meanAveragePrecision

0.9053105917002677

## Model Evaluation

### Split training/test set

### Model training using masked training dataset

### Get list of songs user has not listened

### Model selection

In [17]:
# Model selection (ALS parameter tuning)
from pyspark.mllib.recommendation import ALS
import math

seed = 5L
iterations = 10
regularization_parameter = 0.1
ranks = [4, 8, 12]
errors = [0, 0, 0]
err = 0
tolerance = 0.02

min_error = float('inf')
best_rank = -1
best_iteration = -1
for rank in ranks:
    model = ALS.trainImplicit(training_RDD, rank, seed=seed, iterations=iterations, lambda_=regularization_parameter)
    predictions = model.predictAll(validation_for_predict_RDD).map(lambda r: ((r[0], r[1]), r[2]))
    rates_and_preds = validation_RDD.map(lambda r: ((int(r[0]), int(r[1])), float(r[2]))).join(predictions)
    error = math.sqrt(rates_and_preds.map(lambda r: (r[1][0] - r[1][1])**2).mean())
    errors[err] = error
    err += 1
    print 'For rank %s the RMSE is %s' % (rank, error)
    if error < min_error:
        min_error = error
        best_rank = rank

print 'The best model was trained with rank %s' % best_rank

For rank 4 the RMSE is 7.22033451369
For rank 8 the RMSE is 7.21919181814
For rank 12 the RMSE is 7.21842479653
The best model was trained with rank 12


In [19]:
predictions.take(3)

[((1356, 23776), 0.0011338368332428949),
 ((893, 23776), 0.012175205875991333),
 ((6598, 23776), 0.0047738702913099436)]