In [1]:
import numpy as np
import pandas as pd
import os
import json
import csv

from pyspark import SparkContext
from pyspark.sql import SparkSession
import pyspark.sql.functions as F

from pyspark.sql.functions import lit,concat,col
from pyspark.ml.feature import StringIndexer
from pyspark.sql.types import IntegerType
from time import time

from pyspark.mllib.recommendation import ALS, MatrixFactorizationModel, Rating

n = 10000
csv_name = 'spotify_dataset_top10000.csv'
os.environ['JAVA_HOME'] = r"C:\Program Files\Java\jdk-15.0.1" 
spark = SparkSession.builder.master('local[*]').appName("Recommender").getOrCreate()


In [2]:
df = spark.read.option("header", "true").schema('playlist_id integer, track_id integer').csv(csv_name)\
    .na.drop().withColumn('count',lit(1))


In [3]:
with open('meta.json', 'rb') as f:
    meta = json.load(f)
    num_playlists = meta['num_playlists']
    num_tracks = meta['num_tracks']

df_extra = spark.createDataFrame([((num_playlists+i),i,1) for i in range(num_tracks)],['playlist_id','track_id','count'])

# Load and parse the data
data = df.union(df_extra).rdd


In [22]:
# FIND BEST PARAMETERS
rank = 10
numIterations = 10
l = 0.005
start = time()
model = ALS.implicttrain(data, rank, iterations = numIterations, lambda_ = l, nonnegative =True)

# # Evaluate the model on training data
# testdata = data.map(lambda p: (p[0], p[1]))
# predictions = model.predictAll(testdata).map(lambda r: ((r[0], r[1]), r[2]))
# ratesAndPreds = data.map(lambda r: ((r[0], r[1]), r[2])).join(predictions)
# MSE = ratesAndPreds.map(lambda r: (r[1][0] - r[1][1])**2).mean()
# print("Mean Squared Error = " + str(MSE) + ' rank: ' + str(rank) + ' iter: ' + str(numIterations) + ' lambda: ' + str(l))


print(time() - start)


AttributeError: type object 'ALS' has no attribute 'implicttrain'

In [23]:
model.productFeatures().count()

10000

In [24]:
model.userFeatures().count()

94420

In [25]:
start = time()
rec = model.recommendProductsForUsers(5).collect()
print(time() - start)

27.513184309005737


In [26]:
rec = [[r[0],*[r[1][i][1] for i in range(5)]] for r in rec]
pd_df = pd.DataFrame(rec)
pd_df.index = pd_df.loc[:,0]
pd_df = pd_df.sort_index()[-10000:] #num songs
pd_df[0] = pd_df[0]-84420 #num playlists
pd_df.index = pd_df.loc[:,0]

with open('name_to_id.json', 'rb') as d:
    name_to_id = json.load(d)
    id_to_name = {v:k for k,v in name_to_id.items()}
    
pd_df = pd_df.applymap(lambda x: id_to_name[x])
pd_df.columns = ['track','rec_1','rec_2','rec_3','rec_4','rec_5']
pd_df.index.name = 'track_id'

In [27]:
pd_df.to_csv('reccomendations.csv')

In [28]:
import pandas as pd

pd_df = pd.read_csv('reccomendations.csv')

In [29]:
import sqlite3
conn = sqlite3.connect('reccomendations.db')

cursor = conn.cursor()
cursor.execute("DROP TABLE top_five")

pd_df.to_sql('top_five',conn)

# Save (commit) the changes
conn.commit()

# We can also close the connection if we are done with it.
# Just be sure any changes have been committed or they will be lost.
conn.close()


In [30]:
pd_df

Unnamed: 0,track_id,track,rec_1,rec_2,rec_3,rec_4,rec_5
0,0,Elvis Costello-Alison,Linkin Park-My December,The Head And The Heart-Winter Song,Mumford & Sons-Winter Winds,Ben Howard-End Of The Affair,Boards of Canada-Reach For The Dead
1,1,Crowded House-Don't Dream It's Over,Linkin Park-My December,Mumford & Sons-Winter Winds,The Head And The Heart-Winter Song,Ben Howard-End Of The Affair,"Explosions In The Sky-Be Comfortable, Creature"
2,2,Crowded House-Fall At Your Feet,Muse-Psycho,Muse-Madness,Lorde-Team,Lorde-Tennis Court,Lorde-Royals
3,3,Joshua Radin-I'd Rather Be With You [Radio Edit],Mumford & Sons-Winter Winds,Ben Howard-End Of The Affair,"Explosions In The Sky-Be Comfortable, Creature",London Grammar-Flickers,London Grammar-Stay Awake
4,4,Paul McCartney-Live And Let Die,Linkin Park-My December,Mumford & Sons-Winter Winds,The Head And The Heart-Winter Song,Ben Howard-End Of The Affair,"Explosions In The Sky-Be Comfortable, Creature"
...,...,...,...,...,...,...,...
9995,9995,Joshua Radin-Closer,Daft Punk-Get Lucky,Daft Punk-Instant Crush,Daft Punk-Lose Yourself to Dance,Daft Punk-Doin' it Right,Daft Punk-Get Lucky - Daft Punk Remix
9996,9996,Van Morrison-Madame George,"Explosions In The Sky-Be Comfortable, Creature",Mumford & Sons-Winter Winds,Ben Howard-End Of The Affair,Rodriguez-Can't Get Away,Supersubmarina-LN Granada
9997,9997,Slipknot-Left Behind,Florence + The Machine-Shake It Out - Acoustic,Florence + The Machine-You've Got The Dirtee L...,"Explosions In The Sky-Be Comfortable, Creature",Florence + The Machine-All This And Heaven Too,Florence + The Machine-Breath Of Life
9998,9998,Zedd-The Legend Of Zelda - Original Mix,Mumford & Sons-Winter Winds,Ben Howard-End Of The Affair,Linkin Park-My December,"Explosions In The Sky-Be Comfortable, Creature",The Head And The Heart-Winter Song
