In [39]:
!pip3 install neo4j

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [40]:
pip install neo4j-driver

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [41]:
import pandas as pd
from neo4j import GraphDatabase

In [42]:
# connect to neo4j instance

class Neo4jConnection:


    def __init__(self, uri, user, pwd):
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
        except Exception as e:
            print('Failed to create the driver:', e)


    def close(self):
        if self.__driver is not None:
            self.__driver.close()


    def query(self, query, parameters=None, db=None):
        assert self.__driver is not None, 'Driver not initialised'
        session = None
        response = None
        try:
            session = self.__driver.session(database=db) if db is not None else self.__driver.session()
            response = list(session.run(query, parameters))
        except Exception as e:
            print('Query failed:', e)
        finally:
            if session is not None:
                session.close()
        return response


conn = Neo4jConnection(uri='', 
                       user='neo4j', 
                       pwd='')


In [43]:
query = '''
CALL apoc.load.json('http://ws.audioscrobbler.com/2.0/?method=user.getFriends
&user=dorky_&api_key=<api_key>&format=json&format=json') 
YIELD value
'''

result = conn.query(query)
print(result)

Query failed: {code: Neo.ClientError.Procedure.ProcedureCallFailed} {message: Failed to invoke procedure `apoc.load.json`: Caused by: java.net.MalformedURLException: Illegal character in URL}
None


In [44]:
query = '''
CALL apoc.load.json('http://ws.audioscrobbler.com/2.0/?method=user.getFriends
&user=dorky_&api_key=<api_key>&format=json&format=json') 
YIELD value
'''

result = conn.query(query)
print(result)

Query failed: {code: Neo.ClientError.Procedure.ProcedureCallFailed} {message: Failed to invoke procedure `apoc.load.json`: Caused by: java.net.MalformedURLException: Illegal character in URL}
None


In [45]:
query = '''
CALL apoc.load.json('http://ws.audioscrobbler.com/2.0/?method=chart.gettopartists&api_key=<api_key>&format=json&format=json') 
YIELD value
with value
match (e:Existing {name:value.artists})
unwind value.artist as l2
unwind apoc.map.values(l2, keys(l2)) as l3
unwind apoc.map.values(l3, keys(l3)) as l4
unwind apoc.map.values(l4, keys(l4)) as l5
unwind apoc.map.values(l5, keys(l5)) as l6
foreach (item in l4 |

merge (n:New {listeners:item.listeners, mbid:item.mbid, name:item.name, playcount:item.playcount, streamable:item.streamable, url:item.url })
merge (n)-[:isin]->(e))
'''

result = conn.query(query)
print(result)

[]


In [46]:
pip install pyspark

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [47]:
from pyspark.mllib.recommendation import *
import random
from operator import *


api_key = ''
user_agent = ''

In [48]:
import requests

def lastfm_get(payload):

    url = 'https://ws.audioscrobbler.com/2.0/'

    headers = {
        'user-agent': user_agent
    }

    payload['api_key'] = api_key
    payload['format'] = 'json'

    response = requests.get(url, headers=headers, params=payload)
    return response

In [49]:

user_getfriends_data = lastfm_get({
    'method': 'user.getFriends',
    'user': 'Dorky_'
})

user_gettopalbum_data = lastfm_get({
    'method': 'user.getTopAlbums',
    'user': 'Dorky_'
})


print(user_getfriends_data.status_code)
print(user_gettopalbum_data.status_code)

200
200


In [50]:
import json

def jprint(obj):
    # create a formatted string of the Python JSON object
    text = json.dumps(obj, sort_keys=True, indent=4)
    print(text)

In [51]:
# random user data
kotatsu_data = lastfm_get({
    'method': 'user.getTopAlbums',
    'user': 'Ko_tatsu'
})

In [52]:
jprint(kotatsu_data.json()['topalbums']['album'][0]['name'])

"Drukqs"


In [54]:
# while data.json()['friends']['@attr']['page'] is <= data.json()['friends']['@attr']['totalPages']

In [55]:
jprint(user_getfriends_data.json())

{
    "friends": {
        "@attr": {
            "page": "1",
            "perPage": "50",
            "total": "192",
            "totalPages": "4",
            "user": "Dorky_"
        },
        "user": [
            {
                "bootstrap": "0",
                "country": "None",
                "image": [
                    {
                        "#text": "https://lastfm.freetls.fastly.net/i/u/34s/ad89cb603fd27ca5502d5634dee14284.png",
                        "size": "small"
                    },
                    {
                        "#text": "https://lastfm.freetls.fastly.net/i/u/64s/ad89cb603fd27ca5502d5634dee14284.png",
                        "size": "medium"
                    },
                    {
                        "#text": "https://lastfm.freetls.fastly.net/i/u/174s/ad89cb603fd27ca5502d5634dee14284.png",
                        "size": "large"
                    },
                    {
                        "#text": "https://lastfm.freetls.

In [56]:
jprint(user_getfriends_data.json()['friends']['user'][0]['name'])

"hikamu"


In [57]:
# jprint(user_gettopalbum_data.json()['topalbums']['album']['name'])

TypeError: ignored

In [None]:
jprint(user_gettopalbum_data.json())

In [None]:
# jprint(user_gettopalbum_data.json()['topalbums']['album'][0]['name'])
# jprint(user_gettopalbum_data.json()['topalbums']['album'][0]['playcount'])

In [None]:
# print(user_gettopalbum_data.json()['topalbums']['album'][0]['name'])
# print(user_gettopalbum_data.json()['topalbums']['album'][0]['playcount'])

In [63]:
def get_user_friends():

    friends_names = []

    for friend in user_getfriends_data.json()['friends']['user']:

        name = friend.get('name')

        friends_names.append(name)

    return friends_names

In [64]:
friend_names = get_user_friends()

In [65]:
print(friend_names)
print(len(friend_names))

['hikamu', 'Ko_tatsu', 'oddpulp', 'arthuic', 'darcvv', 'cerejinhalol', 'caudria', 'CitrusCircus', 'kaiquemaximoff', 'Junkento', 'jinsoulist', 'Rockrarted', 'Evan_Mumford', 'Airen3a', 'jomellyfish', 'baratacorn4', 'addison_d', 'mateo1232', 'Labotx95', 'Kinematosis', 'numanuma_', 'ayawaska1', 'guywithaqmark', 'rtasriel', 'anoond', 'maxwellk10', 'weldingbrick', 'Marz_bruh', 'how_aud', 'avvisa', 'DiegoVazquez77', 'CookieHoliday', 'pxrea', 'kt3301', 'Anonymousmov', 'PHChronos', 'Kid_aw', 'smelltoxic', 'iwnaras', 'Hay-zel', 'davxpr', 'AllanZone', 'testtype02', 'semihcc', 'mudkipi', 'sapphicasf', 'emabelickova', 'LuisPaez02', 'AbelgodXOTWOD', 'deicidios']
50


In [59]:
# friend_album = lastfm_get({
#             'method': 'user.getTopAlbums',
#             'user': 'hikamu'
#         })

In [60]:
# print(username)
# print(len(username))
# print(album_name)
# print(len(album_name))

In [66]:
def get_friends_albums_info():

    username = []
    album_title = []
    playcount = []


    for name in get_user_friends():

        friend_album = lastfm_get({
            'method': 'user.getTopAlbums',
            'user': name
        })

        album_n = []
        user_playcount = []

        count = 0
        for album in friend_album.json()['topalbums']['album']:

            count += 1
            title = album.get('name')
            album_n.append(title)

            playcount_num = album.get('playcount')
            user_playcount.append(playcount_num)
        

        username.append([friend_album.json()['topalbums']['@attr']['user']] * count)
        album_title.append(album_n)
        playcount.append(user_playcount)

    return username, album_title, playcount


In [67]:
users, album_names, playcount = get_friends_albums_info()

In [68]:
print(users)
print(album_names)
print(playcount)

[['hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu'], ['Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_

In [69]:
user1d = [i for a in users for i in a]
album_name1d = [i for a in album_names for i in a]
playcount1d = [i for a in playcount for i in a]


print(user1d)
print(album_name1d)
print(playcount1d)

['hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'hikamu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tatsu', 'Ko_tat

In [70]:
playcount = [int(i) for i in playcount1d]
print(playcount)
print(len(playcount))

[8027, 3190, 2356, 2164, 2036, 1735, 1503, 1495, 1428, 1300, 1044, 991, 875, 725, 695, 639, 579, 552, 516, 489, 483, 469, 460, 455, 428, 395, 384, 376, 375, 362, 341, 341, 329, 321, 320, 318, 308, 299, 299, 298, 292, 291, 290, 287, 284, 269, 269, 242, 239, 239, 387, 330, 267, 254, 244, 213, 194, 186, 161, 151, 150, 150, 150, 146, 143, 141, 139, 134, 130, 123, 121, 121, 115, 113, 111, 109, 106, 104, 104, 102, 102, 101, 101, 100, 98, 94, 94, 93, 92, 91, 91, 91, 88, 88, 88, 87, 86, 86, 86, 85, 1919, 1399, 1175, 1149, 1006, 1002, 899, 882, 725, 682, 679, 607, 598, 460, 449, 432, 432, 424, 385, 368, 364, 361, 341, 327, 323, 305, 303, 293, 290, 287, 284, 283, 281, 273, 273, 268, 263, 253, 244, 242, 242, 237, 232, 231, 230, 230, 215, 215, 215, 214, 56, 32, 30, 23, 20, 20, 18, 17, 14, 13, 13, 13, 12, 12, 11, 11, 11, 11, 10, 9, 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 8101, 7168, 4723, 4222, 4056, 3464, 3462, 3322, 3012, 2995, 2978, 2916, 2797, 2

In [71]:
import pandas as pd

user, uniques = pd.factorize(user1d)
album, uniques = pd.factorize(album_name1d)

In [72]:
print(user)
print(type(user))
print()
print(album)
print(type(album))

[ 0  0  0 ... 49 49 49]
<class 'numpy.ndarray'>

[   0    1    2 ... 1750 1751  250]
<class 'numpy.ndarray'>


In [73]:
user_id = [int(i) for i in user]
print(user_id)

album_id = [int(i) for i in album]
print(album_id)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 

In [74]:
data = []
data.append(user_id)
data.append(user1d)
data.append(album_id)
data.append(album_name1d)
data.append(playcount)

In [75]:
print(data)

[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,

In [None]:
# def get_friends_top_album_name():

#     album_name = []
    

#     for name in get_user_friends():

#         friend_album = lastfm_get({
#             'method': 'user.getTopAlbums',
#             'user': name
#         })

#         album = friend_album.json()['topalbums']['album']['name']

#         album_name.append(album)


#     return album_name

In [None]:
# album_names = get_friends_top_album_name()

In [None]:
# print(album_names)
# print(len(album_names))

In [None]:
# def get_friends_top_album_playcount():

#     playcount = []

#     for name in get_user_friends():

#         friend_album = lastfm_get({
#             'method': 'user.getTopAlbums',
#             'user': name
#         })

#         playcnt = friend_album.json()['topalbums']['album'][0]['playcount']

#         playcount.append(str(playcnt))

#     return playcount

In [None]:
# album_playcount = get_friends_top_album_playcount()

In [None]:
# print(album_playcount)
# print(len(album_playcount))

In [None]:
# n_playcount = [int(i) for i in album_playcount]
# print(n_playcount)

In [None]:
# import pandas as pd

# albums, uniques = pd.factorize(album_names)


In [None]:
# print(albums)

# print(type(albums))

In [None]:
# n_albums = [int(i) for i in albums]
# print(n_albums)


In [None]:
# data = []

# data.append(list(range(1,50)))
# data.append(friend_names)
# data.append(n_albums)
# data.append(album_names)
# data.append(n_playcount)

In [None]:
print(data)

In [76]:
n_data = [[a[n] for a in data] for n in range(len(data[0]))]

In [77]:
print(n_data)

[[0, 'hikamu', 0, 'Black Moon', 8027], [0, 'hikamu', 1, 'D&G', 3190], [0, 'hikamu', 2, 'FLASH DESIRE', 2356], [0, 'hikamu', 3, 'Azeroy', 2164], [0, 'hikamu', 4, 'Icedancer', 2036], [0, 'hikamu', 5, 'Marjorie -W.C. Sinclair', 1735], [0, 'hikamu', 6, 'AZEROY GARDEN 8888', 1503], [0, 'hikamu', 7, 'SURGEON DOGMA', 1495], [0, 'hikamu', 8, 'Last Bible', 1428], [0, 'hikamu', 9, 'SWORDS', 1300], [0, 'hikamu', 10, 'CRYSTAL HELIX', 1044], [0, 'hikamu', 11, 'Gluee', 991], [0, 'hikamu', 12, 'EVERSINCE', 875], [0, 'hikamu', 13, 'BAROQUE', 725], [0, 'hikamu', 14, 'Contemporary Movement', 695], [0, 'hikamu', 15, 'Working on Dying', 639], [0, 'hikamu', 16, 'E', 579], [0, 'hikamu', 17, 'Red Light', 552], [0, 'hikamu', 18, 'Earlier Works Bootleg', 516], [0, 'hikamu', 19, 'Munnie Fetish Renegade Demon', 489], [0, 'hikamu', 20, 'Legendary Member', 483], [0, 'hikamu', 21, 'Let Me Go', 469], [0, 'hikamu', 22, 'VALERIA PULSAR', 460], [0, 'hikamu', 23, 'The Best Piece of Shit Vol. 3', 455], [0, 'hikamu', 24, 

In [78]:
pip install pyspark

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [79]:
# import libraries
from pyspark import SparkContext
from pyspark.ml.recommendation import ALS
from pyspark.sql import SparkSession ,Row

In [80]:
appName="exam neo4j lastfm"

# initialize the spark session
spark = SparkSession.builder.appName(appName).getOrCreate()

# get sparkcontext from the sparksession
sc = spark.sparkContext

In [81]:
from pyspark.sql.types import *
from pyspark.sql.functions import col

user_schema = StructType([
    StructField('user_id', IntegerType()),
    StructField('user_name', StringType()),
    StructField('album_id', IntegerType()),
    StructField('album_name', StringType()),
    StructField('playcount', IntegerType())
])



In [82]:
# user_rdd = sc.parallelize([friend_names])

In [83]:
df = spark.createDataFrame(n_data, schema=user_schema)

In [84]:
df.printSchema()

root
 |-- user_id: integer (nullable = true)
 |-- user_name: string (nullable = true)
 |-- album_id: integer (nullable = true)
 |-- album_name: string (nullable = true)
 |-- playcount: integer (nullable = true)



In [85]:
df.show(truncate=False)

+-------+---------+--------+----------------------------+---------+
|user_id|user_name|album_id|album_name                  |playcount|
+-------+---------+--------+----------------------------+---------+
|0      |hikamu   |0       |Black Moon                  |8027     |
|0      |hikamu   |1       |D&G                         |3190     |
|0      |hikamu   |2       |FLASH DESIRE                |2356     |
|0      |hikamu   |3       |Azeroy                      |2164     |
|0      |hikamu   |4       |Icedancer                   |2036     |
|0      |hikamu   |5       |Marjorie -W.C. Sinclair     |1735     |
|0      |hikamu   |6       |AZEROY GARDEN 8888          |1503     |
|0      |hikamu   |7       |SURGEON DOGMA               |1495     |
|0      |hikamu   |8       |Last Bible                  |1428     |
|0      |hikamu   |9       |SWORDS                      |1300     |
|0      |hikamu   |10      |CRYSTAL HELIX               |1044     |
|0      |hikamu   |11      |Gluee               

In [86]:
# dataset split into training and testing set
(training, test) = df.randomSplit([0.8, 0.2])

In [87]:
#Training the model
als = ALS(maxIter=5, 
          implicitPrefs=True,
          userCol="user_id", 
          itemCol="album_id", 
          ratingCol="playcount",
          coldStartStrategy="drop")

In [88]:
# predict using the testing datatset
model = als.fit(training)


In [89]:
predictions = model.transform(test)
predictions.show()

+-------+------------+--------+--------------------+---------+------------+
|user_id|   user_name|album_id|          album_name|playcount|  prediction|
+-------+------------+--------+--------------------+---------+------------+
|     12|Evan_Mumford|     167|              Blonde|      255|  0.86312187|
|     12|Evan_Mumford|     534|         Madvillainy|      451|   0.5551498|
|      1|    Ko_tatsu|      58|            Geogaddi|      161|   0.2071779|
|      1|    Ko_tatsu|      71|         Tri Repetae|      121| 0.024169847|
|      1|    Ko_tatsu|      82|  A Moon Shaped Pool|      101|  0.27044207|
|     13|     Airen3a|      80|     The Glow, Pt. 2|      202|  -0.2850807|
|     13|     Airen3a|     451|      Hounds of Love|      213|  -0.5000315|
|     13|     Airen3a|     504|Serú Girán (Remas...|       55|  0.13302967|
|     13|     Airen3a|     536|             Veteran|      176|  0.21098709|
|     13|     Airen3a|     607|my love feels all...|       48|    0.152865|
|      6|   

In [None]:
# album_schema = StructType([

#     StructField('album_name', StringType()),
#     StructField('playcount', StringType())

# ])

In [None]:
# album_df = spark.createDataFrame([], schema = album_schema)

In [None]:
# album_df.printSchema()

In [None]:
'''
from pyspark.sql.types import *
from pyspark.sql.functions import col

#Define the schema for the datasets
schema_charttopartist = StructType([

    StructField("artists", StructType([

        StructField("@attr", StructType([

            StructField('total', StringType()),
            StructField('perPage', StringType()), 
            StructField('totalPages', StringType()),
            StructField('page', StringType())
            
        ])),
        
        StructField('artist', ArrayType(StructType([

            StructField('image', ArrayType(StructType([

                StructField('#text', StringType()),
                StructField('size',  StringType())
                
            ]))),


            # StructField('empty', StructType([

            StructField('mbid', StringType()),
            StructField('listeners', StringType()),
            StructField('streamable', StringType()),
            StructField('playcount', StringType()),
            StructField('name', StringType()),
            StructField('url', StringType()) 
            ])))
            
                
            
            
        ]))])
    # ])#)
# ])

In [None]:
df = spark.createDataFrame([], schema = schema_charttopartist)

In [None]:
df.printSchema()

In [None]:
rdd = spark.sparkContext.parallelize([chart_topartist_data.text])

In [None]:
chart_topartist_df = spark.read.json(rdd)

In [None]:
chart_topartist_df.printSchema()

In [None]:
chart_topartist_df.show(truncate=False)

In [None]:
# custom_charttopartist_df = spark.createDataFrame([], schema = custom_charttopartist)

In [None]:
# custom_charttopartist_df.printSchema()

In [None]:
# custom_usertopartist = StructType([

#     StructField('name', StringType()),
#     StructField('playcount', StringType())

# ])


In [None]:
# custom_usertopartist_df = spark.createDataFrame([], schema = custom_usertopartist)

In [None]:
# custom_usertopartist_df.printSchema()

In [None]:
# # match relationships (basic paths)

# query = '''MATCH (p1:Person) - [r]->(p2:Person) RETURN p1, r, p2 limit 10'''

# result = conn.query(query)
# print(result)

In [None]:

# from neo4j import GraphDatabase, basic_auth

# driver = GraphDatabase.driver(
#   "bolt://35.175.122.15:7687",
#   auth=basic_auth("neo4j", "child-teeth-formula"))

# cypher_query = '''
# MATCH (n)
# RETURN COUNT(n) AS count
# LIMIT $limit
# '''

# with driver.session(database="neo4j") as session:
#   results = session.read_transaction(
#     lambda tx: tx.run(cypher_query,
#                       limit=10).data())
#   for record in results:
#     print(record['count'])

# driver.close()
