In [20]:
import pandas as pd
import numpy as np
import datetime as dt
from keras.models import Sequential
from keras.layers import Dense, Dropout, Embedding, LSTM, Conv1D, MaxPooling1D, Flatten
from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
from keras.utils import np_utils
import matplotlib.pyplot as plt
import seaborn as sns
import keras
# import plotly.express as px
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from gensim.models import Word2Vec
from sklearn.preprocessing import OneHotEncoder

In [2]:
listening_history = pd.read_csv('../data/listening_history.csv', sep='\t')
id_metadata = pd.read_csv('../data/id_metadata.csv', sep='\t')
id_information = pd.read_csv('../data/id_information.csv', sep='\t')

# Listening history 

In [3]:
listening_history

Unnamed: 0,user,song,timestamp
0,user_007XIjOr,DaTQ53TUmfP93FSr,2019-02-20 12:28
1,user_007XIjOr,dGeyvi5WCOjDU7da,2019-02-20 12:35
2,user_007XIjOr,qUm54NYOjeFhmKYx,2019-02-20 12:48
3,user_007XIjOr,FtnuMT1DlevSR2n5,2019-02-20 12:52
4,user_007XIjOr,LHETTZcSZLeaVOGh,2019-02-20 13:09
...,...,...,...
5109587,user_zzWscYTy,BBiswLufo26YQCT7,2019-01-10 15:57
5109588,user_zzWscYTy,5ZHgff3sjETIiedr,2019-01-10 16:21
5109589,user_zzWscYTy,m4O1iLh6fC43xjRy,2019-01-10 16:48
5109590,user_zzWscYTy,mvUaP8k67qOFfA65,2019-01-10 21:13


### timestamp to datetime

In [4]:
listening_history['timestamp'] = pd.to_datetime(listening_history['timestamp'], format='%Y-%m-%d %H:%M')
listening_history.dtypes

user                 object
song                 object
timestamp    datetime64[ns]
dtype: object

### Adding time_zone to dataframe according to morning, noon, afternoon

In [5]:
def determine_time_zone(timestamp):
    hour = timestamp.hour
    if 4 <= hour < 12:
        return 0
    elif 12 <= hour < 20:
        return 1
    else:
        return 2
    
listening_history['time_zone'] = listening_history['timestamp'].apply(determine_time_zone)

In [6]:
listening_history.groupby('time_zone').count()

Unnamed: 0_level_0,user,song,timestamp
time_zone,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,1072286,1072286,1072286
1,2146765,2146765,2146765
2,1890541,1890541,1890541


# Metadata

In [7]:
id_metadata

Unnamed: 0,id,spotify_id,popularity,release,danceability,energy,key,mode,valence,tempo,duration_ms
0,0009fFIM1eYThaPg,3eObKIfHKJ1nAPh0wTxFCc,12.0,2009,0.635,0.746,6.0,1.0,0.548,110.973,229947
1,0010xmHR6UICBOYT,27szvF97Tu95GxN98N52fy,46.0,2013,0.591,0.513,7.0,0.0,0.263,172.208,325096
2,002Jyd0vN4HyCpqL,273lBFpxUCwisTpdnF9cVb,31.0,1974,0.319,0.925,2.0,1.0,0.658,157.630,285693
3,006TYKNjNxWjfKjy,1qZgergQ41vaD4zBf3AKXR,33.0,2017,0.432,0.979,7.0,1.0,0.162,90.008,332867
4,007LIJOPQ4Sb98qV,6rVxJ3sN3Cz40MSLavbG1K,19.0,2009,0.357,0.708,9.0,1.0,0.470,123.904,326067
...,...,...,...,...,...,...,...,...,...,...,...
109264,zzyyPUs7hC9Nz2e1,6IW5yPlWJYmXxRuuc7QkMx,36.0,2017,0.380,0.835,1.0,1.0,0.539,142.699,200880
109265,zzz0n04uuTUA7fNh,21pCToQdqcPi879auxjxKe,48.0,2014,0.572,0.503,6.0,0.0,0.553,103.215,262840
109266,zzzj3LYaZtYtbzSr,2SJ6FGmRPacKE9N4g4BRZU,21.0,2010,0.402,0.376,2.0,0.0,0.119,108.509,276507
109267,zzznMjZAKnJJXQSj,2ygfHXyt3gvyhvKrNJU61n,47.0,2017,0.532,0.977,10.0,0.0,0.966,116.527,224549


### Rename id column to song and drop unnecessary columns

In [8]:
id_metadata.rename(columns={'id': 'song'}, inplace=True)
id_metadata = id_metadata.drop('spotify_id', axis=1)
id_metadata

Unnamed: 0,song,popularity,release,danceability,energy,key,mode,valence,tempo,duration_ms
0,0009fFIM1eYThaPg,12.0,2009,0.635,0.746,6.0,1.0,0.548,110.973,229947
1,0010xmHR6UICBOYT,46.0,2013,0.591,0.513,7.0,0.0,0.263,172.208,325096
2,002Jyd0vN4HyCpqL,31.0,1974,0.319,0.925,2.0,1.0,0.658,157.630,285693
3,006TYKNjNxWjfKjy,33.0,2017,0.432,0.979,7.0,1.0,0.162,90.008,332867
4,007LIJOPQ4Sb98qV,19.0,2009,0.357,0.708,9.0,1.0,0.470,123.904,326067
...,...,...,...,...,...,...,...,...,...,...
109264,zzyyPUs7hC9Nz2e1,36.0,2017,0.380,0.835,1.0,1.0,0.539,142.699,200880
109265,zzz0n04uuTUA7fNh,48.0,2014,0.572,0.503,6.0,0.0,0.553,103.215,262840
109266,zzzj3LYaZtYtbzSr,21.0,2010,0.402,0.376,2.0,0.0,0.119,108.509,276507
109267,zzznMjZAKnJJXQSj,47.0,2017,0.532,0.977,10.0,0.0,0.966,116.527,224549


# Data preparation
### merge metadata with history

In [12]:
merged_df = listening_history.merge(id_metadata, on='song')
merged_df = merged_df.drop('timestamp', axis=1)
merged_df

Unnamed: 0,user,song,time_zone,popularity,release,danceability,energy,key,mode,valence,tempo,duration_ms
0,user_007XIjOr,DaTQ53TUmfP93FSr,1,55.0,2016,0.360,0.257,7.0,1.0,0.130,76.972,212184
1,user_007XIjOr,DaTQ53TUmfP93FSr,1,55.0,2016,0.360,0.257,7.0,1.0,0.130,76.972,212184
2,user_02DWuQOR,DaTQ53TUmfP93FSr,1,55.0,2016,0.360,0.257,7.0,1.0,0.130,76.972,212184
3,user_02DWuQOR,DaTQ53TUmfP93FSr,1,55.0,2016,0.360,0.257,7.0,1.0,0.130,76.972,212184
4,user_02DWuQOR,DaTQ53TUmfP93FSr,1,55.0,2016,0.360,0.257,7.0,1.0,0.130,76.972,212184
...,...,...,...,...,...,...,...,...,...,...,...,...
5109587,user_zvN2NV5F,f5NTrCtvRA6b0n4L,2,41.0,1998,0.620,0.786,8.0,1.0,0.923,131.447,134800
5109588,user_zvqqjDEW,Cse4wkzig03ZT86e,2,8.0,1998,0.286,0.753,2.0,1.0,0.195,124.372,570680
5109589,user_zwYyH93G,hKWwaAkkIuaEYqO5,2,25.0,1999,0.705,0.536,6.0,1.0,0.252,80.988,274467
5109590,user_zwjkkiDG,u6tON1C2GLRiCalC,1,43.0,2017,0.385,0.893,5.0,1.0,0.484,173.784,221293


In [24]:
numerical_features = ['time_zone','popularity', 'release', 'danceability', 'energy', 'valence','key', 'mode', 'tempo', 'duration_ms']

In [68]:
scaler = MinMaxScaler()
merged_df[numerical_features] = scaler.fit_transform(merged_df[numerical_features])
merged_df

Unnamed: 0,user,song,time_zone,popularity,release,danceability,energy,key,mode,valence,tempo,duration_ms
0,user_007XIjOr,DaTQ53TUmfP93FSr,0.5,0.578947,0.997018,0.364372,0.257,0.636364,1.0,0.130261,0.316884,0.041089
1,user_007XIjOr,DaTQ53TUmfP93FSr,0.5,0.578947,0.997018,0.364372,0.257,0.636364,1.0,0.130261,0.316884,0.041089
2,user_02DWuQOR,DaTQ53TUmfP93FSr,0.5,0.578947,0.997018,0.364372,0.257,0.636364,1.0,0.130261,0.316884,0.041089
3,user_02DWuQOR,DaTQ53TUmfP93FSr,0.5,0.578947,0.997018,0.364372,0.257,0.636364,1.0,0.130261,0.316884,0.041089
4,user_02DWuQOR,DaTQ53TUmfP93FSr,0.5,0.578947,0.997018,0.364372,0.257,0.636364,1.0,0.130261,0.316884,0.041089
...,...,...,...,...,...,...,...,...,...,...,...,...
5109587,user_zvN2NV5F,f5NTrCtvRA6b0n4L,1.0,0.431579,0.979125,0.627530,0.786,0.727273,1.0,0.924850,0.541150,0.025575
5109588,user_zvqqjDEW,Cse4wkzig03ZT86e,1.0,0.084211,0.979125,0.289474,0.753,0.181818,1.0,0.195391,0.512023,0.112959
5109589,user_zwYyH93G,hKWwaAkkIuaEYqO5,1.0,0.263158,0.980119,0.713563,0.536,0.545455,1.0,0.252505,0.333417,0.053575
5109590,user_zwjkkiDG,u6tON1C2GLRiCalC,0.5,0.452632,0.998012,0.389676,0.893,0.454545,1.0,0.484970,0.715446,0.042915


### sequences

In [71]:
sequence_length = 5
user_sequences = {}
user_targets = {}

unique_users = merged_df['user'].unique()
num_users_to_select = 10
selected_users = unique_users[:num_users_to_select]
selected_users


array(['user_007XIjOr', 'user_02DWuQOR', 'user_0BZUk6bj', 'user_0PJuAoVH',
       'user_0QbKRt8m', 'user_0iJPiO69', 'user_0p5fXniu', 'user_0woySlm9',
       'user_1tbCBIuJ', 'user_1uuvfpkw'], dtype=object)

In [88]:
selected_users_df = merged_df[merged_df['user'].isin(selected_users)]
selected_users_df

Unnamed: 0,user,song,time_zone,popularity,release,danceability,energy,key,mode,valence,tempo,duration_ms
0,user_007XIjOr,DaTQ53TUmfP93FSr,0.5,0.578947,0.997018,0.364372,0.257,0.636364,1.0,0.130261,0.316884,0.041089
1,user_007XIjOr,DaTQ53TUmfP93FSr,0.5,0.578947,0.997018,0.364372,0.257,0.636364,1.0,0.130261,0.316884,0.041089
2,user_02DWuQOR,DaTQ53TUmfP93FSr,0.5,0.578947,0.997018,0.364372,0.257,0.636364,1.0,0.130261,0.316884,0.041089
3,user_02DWuQOR,DaTQ53TUmfP93FSr,0.5,0.578947,0.997018,0.364372,0.257,0.636364,1.0,0.130261,0.316884,0.041089
4,user_02DWuQOR,DaTQ53TUmfP93FSr,0.5,0.578947,0.997018,0.364372,0.257,0.636364,1.0,0.130261,0.316884,0.041089
...,...,...,...,...,...,...,...,...,...,...,...,...
4191488,user_1uuvfpkw,Cm1td6VOEeQ4BdsM,0.5,0.526316,0.999006,0.644737,0.348,0.000000,1.0,0.266533,0.616102,0.030015
4191524,user_1uuvfpkw,OdrLYcgU4DyEICZP,0.5,0.410526,0.994036,0.343117,0.286,0.636364,1.0,0.243487,0.580425,0.038864
4191525,user_1uuvfpkw,OdrLYcgU4DyEICZP,0.5,0.410526,0.994036,0.343117,0.286,0.636364,1.0,0.243487,0.580425,0.038864
4191586,user_1uuvfpkw,8QlRkgeYjlnoe84z,0.0,0.505263,0.955268,0.206478,0.406,0.000000,1.0,0.090581,0.656945,0.083866


In [93]:
max_listens = selected_users_df.groupby('user').count().max().song
max_listens

444

In [257]:
selected_users_df.groupby('song').count()

Unnamed: 0_level_0,user,time_zone,popularity,release,danceability,energy,key,mode,valence,tempo,duration_ms
song,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
019Q5nZ079pL34qa,1,1,1,1,1,1,1,1,1,1,1
01Kgo55olDKOHxQz,1,1,1,1,1,1,1,1,1,1,1
0247XD85scrr19o0,5,5,5,5,5,5,5,5,5,5,5
02tlc7d6RtnNVG3P,4,4,4,4,4,4,4,4,4,4,4
03v7PvCepe2N5A6V,1,1,1,1,1,1,1,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...
zvNnbDSgevQ3A35j,1,1,1,1,1,1,1,1,1,1,1
zwHvcxA5beGDSUul,8,8,8,8,8,8,8,8,8,8,8
zxE8Hv3rDDFdgZzu,2,2,2,2,2,2,2,2,2,2,2
zyz0UbYN4n9rHXex,1,1,1,1,1,1,1,1,1,1,1


In [122]:
selected_users_df.groupby('user').head(1)

Unnamed: 0,user,song,time_zone,popularity,release,danceability,energy,key,mode,valence,tempo,duration_ms
0,user_007XIjOr,DaTQ53TUmfP93FSr,0.5,0.578947,0.997018,0.364372,0.257,0.636364,1.0,0.130261,0.316884,0.041089
2,user_02DWuQOR,DaTQ53TUmfP93FSr,0.5,0.578947,0.997018,0.364372,0.257,0.636364,1.0,0.130261,0.316884,0.041089
14,user_0BZUk6bj,DaTQ53TUmfP93FSr,1.0,0.578947,0.997018,0.364372,0.257,0.636364,1.0,0.130261,0.316884,0.041089
16,user_0PJuAoVH,DaTQ53TUmfP93FSr,1.0,0.578947,0.997018,0.364372,0.257,0.636364,1.0,0.130261,0.316884,0.041089
17,user_0QbKRt8m,DaTQ53TUmfP93FSr,0.0,0.578947,0.997018,0.364372,0.257,0.636364,1.0,0.130261,0.316884,0.041089
18,user_0iJPiO69,DaTQ53TUmfP93FSr,0.5,0.578947,0.997018,0.364372,0.257,0.636364,1.0,0.130261,0.316884,0.041089
19,user_0p5fXniu,DaTQ53TUmfP93FSr,0.0,0.578947,0.997018,0.364372,0.257,0.636364,1.0,0.130261,0.316884,0.041089
21,user_0woySlm9,DaTQ53TUmfP93FSr,0.5,0.578947,0.997018,0.364372,0.257,0.636364,1.0,0.130261,0.316884,0.041089
22,user_1tbCBIuJ,DaTQ53TUmfP93FSr,1.0,0.578947,0.997018,0.364372,0.257,0.636364,1.0,0.130261,0.316884,0.041089
24,user_1uuvfpkw,DaTQ53TUmfP93FSr,0.5,0.578947,0.997018,0.364372,0.257,0.636364,1.0,0.130261,0.316884,0.041089


In [258]:
selected_users_df.to_csv('seleccted_users.csv')

In [244]:
for user_id, user_data in selected_users_df.groupby('user'):
    sequences = []
    targets = []
    for i in range(max_listens - sequence_length):
#         if(i+sequence_length + 1 > len(user_data)):
# #             for j in range(sequence_length):
#             sequences.append(sequence_length * [np.array(11*[-1])])
#             print(i)
#         else:
        sequence = user_data.iloc[i:i+sequence_length]
#             print(sequence)
        target = user_data.iloc[i+sequence_length]['song']
        print(target)
        # Extract the relevant features for the sequence
        sequence_data = sequence[['song','time_zone','popularity', 'release', 'danceability', 'energy', 'valence','key', 'mode', 'tempo', 'duration_ms']].values
#             print(sequence_data)
        # Append the sequence and target to the lists
        sequences.append(sequence_data)
        targets.append(target)
        print(len(sequence_data))
#         break
#     break
    # Convert the lists to numpy arrays
    sequences = np.array(sequences)
    targets = np.array(targets)

#     print(sequences)
    # Store the sequences and targets in the dictionaries with user_id as the key
    user_sequences[user_id] = sequences
    user_targets[user_id] = targets

dGeyvi5WCOjDU7da
5
dGeyvi5WCOjDU7da
5
dGeyvi5WCOjDU7da
5
dGeyvi5WCOjDU7da
5
qUm54NYOjeFhmKYx
5
FtnuMT1DlevSR2n5
5
FtnuMT1DlevSR2n5
5
LHETTZcSZLeaVOGh
5
LHETTZcSZLeaVOGh
5
LHETTZcSZLeaVOGh
5
LHETTZcSZLeaVOGh
5
zXMHUt57MEYgpgIz
5
zXMHUt57MEYgpgIz
5
zXMHUt57MEYgpgIz
5
zXMHUt57MEYgpgIz
5
zXMHUt57MEYgpgIz
5
zXMHUt57MEYgpgIz
5
zXMHUt57MEYgpgIz
5
zXMHUt57MEYgpgIz
5
zXMHUt57MEYgpgIz
5
zXMHUt57MEYgpgIz
5
zXMHUt57MEYgpgIz
5
zXMHUt57MEYgpgIz
5
zXMHUt57MEYgpgIz
5
zXMHUt57MEYgpgIz
5
zXMHUt57MEYgpgIz
5
zXMHUt57MEYgpgIz
5
zXMHUt57MEYgpgIz
5
zXMHUt57MEYgpgIz
5
zXMHUt57MEYgpgIz
5
zXMHUt57MEYgpgIz
5
zXMHUt57MEYgpgIz
5
zXMHUt57MEYgpgIz
5
zXMHUt57MEYgpgIz
5
zXMHUt57MEYgpgIz
5
zXMHUt57MEYgpgIz
5
zXMHUt57MEYgpgIz
5
zXMHUt57MEYgpgIz
5
zXMHUt57MEYgpgIz
5
zXMHUt57MEYgpgIz
5
zXMHUt57MEYgpgIz
5
zXMHUt57MEYgpgIz
5
QDtAFWCvEaRwFPnC
5
QDtAFWCvEaRwFPnC
5
MFVaxfkxZAUc4C8U
5
DhLocRBlCrB5ikDO
5
DhLocRBlCrB5ikDO
5
DhLocRBlCrB5ikDO
5
DhLocRBlCrB5ikDO
5
DhLocRBlCrB5ikDO
5
zkJb2QRrowAMNqvr
5
zkJb2QRrowAMNqvr
5
zkJb2QRrowAM

5
MuuCVghux11weAX8
5
0ZLbSoXXsKlXYZ7a
5
GvHdsJ73USXNRqEN
5
GvHdsJ73USXNRqEN
5
ye6SUPDliXCR2kMs
5
os42rbBZt4EjOJjV
5
a5Q5tQ8fU4bWEyQV
5
a5Q5tQ8fU4bWEyQV
5
a5Q5tQ8fU4bWEyQV
5
a5Q5tQ8fU4bWEyQV
5
a5Q5tQ8fU4bWEyQV
5
a5Q5tQ8fU4bWEyQV
5
sGq0VelUSRbKXiaJ
5
sGq0VelUSRbKXiaJ
5
sGq0VelUSRbKXiaJ
5
sGq0VelUSRbKXiaJ
5
sGq0VelUSRbKXiaJ
5
sGq0VelUSRbKXiaJ
5
sGq0VelUSRbKXiaJ
5
VMjxxrIP70wCdFqE
5
VMjxxrIP70wCdFqE
5
VMjxxrIP70wCdFqE
5
VMjxxrIP70wCdFqE
5
VMjxxrIP70wCdFqE
5
VMjxxrIP70wCdFqE
5
VMjxxrIP70wCdFqE
5
VMjxxrIP70wCdFqE
5
IPBxYluXzjFoMWcN
5
IPBxYluXzjFoMWcN
5
IPBxYluXzjFoMWcN
5
IPBxYluXzjFoMWcN
5
IPBxYluXzjFoMWcN
5
IPBxYluXzjFoMWcN
5
IPBxYluXzjFoMWcN
5
s9rPUPTku9iMWxlf
5
s9rPUPTku9iMWxlf
5
s9rPUPTku9iMWxlf
5
s9rPUPTku9iMWxlf
5
s9rPUPTku9iMWxlf
5
s9rPUPTku9iMWxlf
5
s9rPUPTku9iMWxlf
5
fBOMx3gzXIQ7duye
5
KnRDmXN8gqzf0fWx
5
MtEkJVpO4YLZDk8i
5
9D9XV83huXX44IB4
5
amve4aeXx0lnkWl7
5
GLF3crx5SBWO0NUM
5
stmvJR8XwU72o8AI
5
8oCNU9rPcvqM8AWS
5
jMrcqngxP9sYZ4GB
5
5a88nuWH641sE63O
5
3l32faScrYNVf1rT
5
fjBxt9kUxN

5
gBzRda3nTO4Mecre
5
WK3RuY0uVJiTusE7
5
WK3RuY0uVJiTusE7
5
DQIhGr1Ot8eBt1Xg
5
o2LcK38b4mFLa55g
5
o2LcK38b4mFLa55g
5
rHDPdKtkGg1MiHDb
5
oko0sQL8EFOGTdrz
5
NxGbCGEsEYOXF08N
5
NxGbCGEsEYOXF08N
5
NxGbCGEsEYOXF08N
5
aqVuQmsCdABZYPXV
5
HY3POX0hY4lj7ZGP
5
ujdjTAhh2zQ62MbZ
5
lvWUqZQMXYn2qvws
5
lvWUqZQMXYn2qvws
5
lvWUqZQMXYn2qvws
5
lvWUqZQMXYn2qvws
5
n9a6Q6iaPIsz1kqf
5
n9a6Q6iaPIsz1kqf
5
n9a6Q6iaPIsz1kqf
5
n9a6Q6iaPIsz1kqf
5
n9a6Q6iaPIsz1kqf
5
n9a6Q6iaPIsz1kqf
5
nHpYKfXG74Yr89FX
5
nHpYKfXG74Yr89FX
5
nHpYKfXG74Yr89FX
5
nHpYKfXG74Yr89FX
5
nHpYKfXG74Yr89FX
5
nHpYKfXG74Yr89FX
5
nHpYKfXG74Yr89FX
5
QjZ42Z5ZVh8kDrT0
5
QjZ42Z5ZVh8kDrT0
5
QjZ42Z5ZVh8kDrT0
5
QjZ42Z5ZVh8kDrT0
5
QjZ42Z5ZVh8kDrT0
5
QjZ42Z5ZVh8kDrT0
5
QjZ42Z5ZVh8kDrT0
5
QjZ42Z5ZVh8kDrT0
5
Y4CsQmo3lY6wEE12
5
Y4CsQmo3lY6wEE12
5
Y4CsQmo3lY6wEE12
5
yQiVAWqDTHE0KjsR
5
3tNQWrL8rYL7zBmY
5
3tNQWrL8rYL7zBmY
5
3tNQWrL8rYL7zBmY
5
3tNQWrL8rYL7zBmY
5
3tNQWrL8rYL7zBmY
5
3tNQWrL8rYL7zBmY
5
N3rh9mkjgED8EPaN
5
CXUnI5MHMzqSXSHa
5
Uop8gPCwESlxWLBg
5
Uop8gPCwES

5
wqdE9DkWgjMdGwOn
5
0Zhw8ngsZeMvxZM3
5
0Zhw8ngsZeMvxZM3
5
0Zhw8ngsZeMvxZM3
5
Fywe8EHE9KSD5Ulw
5
Xqnqd69GY5xNU3iH
5
9ziWMjkmNbJKtU8Y
5
JP2Xy1Oypp16D4EO
5
d7p61YT3cTo0FhOi
5
alLiFpkto84bTls1
5
FC11LJWfJJnEvBKD
5
DmweUoqU3VC5iBwM
5
NeY9ivzSUmqfloMK
5
25tftkfnWAd88uVv
5
O0L20lvwMeIawcEj
5
BEVFAClIA3ACssk0
5
LsQkLcc98qoN2sWJ
5
E2XW0WY6ZAgSzyHZ
5
sQy1Nq8InxKtV0mW
5
kTi1WF4m26YVNsVu
5
CoHup8lzLaSc0Z58
5
jjWvwzkJOqJjAJ2n
5
35xLsoRAsn42kU6f
5
vvST5h16hjYjkzRC
5
s3XQ9Lyp7RclZvIz
5
x4RAQjvhxrggl1ii
5
x4RAQjvhxrggl1ii
5
cEWqJJAi3qWHfXtV
5
kYFCo7M0YFfCNRmM
5
18yKlRtGvfXYh35w
5
nbqQmabqFXLS2rqQ
5
S3vBkirIgZ9YYa8N
5
mspk4E8kfs5z11lR
5
kqfdI9bYpJPeMZZ5
5
bu1zMesTQKMcREAY
5
aH0vIGZ8Y9uP9mlm
5
z5aj7SKojvbx7XEO
5
FRt9EKId1w1xCbiu
5
f6c79k4bG661GWIb
5
1wfIkdvGunMCtin2
5
bXjUnexphin1fGEE
5
9dih3e1IumF2IXvS
5
9dih3e1IumF2IXvS
5
9dih3e1IumF2IXvS
5
EvKsiZtqxHWSUOIm
5
EvKsiZtqxHWSUOIm
5
EvKsiZtqxHWSUOIm
5
EvKsiZtqxHWSUOIm
5
Pxu7d6dw3oRvAjfm
5
Pxu7d6dw3oRvAjfm
5
GoYo9wRiITsRDvYB
5
GoYo9wRiITsRDvYB
5
9CFIZd07WU

48c10ilafPYDEaKl
5
48c10ilafPYDEaKl
5
48c10ilafPYDEaKl
5
eqap64zLlsAZxizW
5
eqap64zLlsAZxizW
5
Ru6svPlOp39EjVpc
5
n2dmN7MAS5kbkkJr
5
SBRwJDRNzRtOQbKX
5
tzxTJugY3lsx6Om7
5
FfAOWFUOWdSn0M08
5
1meisSGwt4KAjrVP
5
GzacORlsoC9wIvKs
5
EQeRaQuzupJSeJYX
5
xLt7ii72kphLnRMa
5
Td5yqNdeMj4xqPAd
5
Td5yqNdeMj4xqPAd
5
5RxsEogLBdXW2U8v
5
DNxXlUUNqX4saEBc
5
tgHwzfJGgNxVdGCg
5
KnMItrBYhHFixzJT
5
KnMItrBYhHFixzJT
5
TXi2DCIQzKFHA1uL
5
TXi2DCIQzKFHA1uL
5
YwURIu6eZDCJyuEf
5
hHNHxJvuJGQe3RZF
5
H7dhyrE2IpdLFFTp
5
H7dhyrE2IpdLFFTp
5
H7dhyrE2IpdLFFTp
5
JXF6FqgJ4J52uPSG
5
aEcWdVssKTA0Ayrh
5
aEcWdVssKTA0Ayrh
5
LPDMx981Xn1Hpmrz
5
LPDMx981Xn1Hpmrz
5
EBB7mUheKyVbv6OI
5
iPbNmnHajSsGPq0n
5
iPbNmnHajSsGPq0n
5
8AVECDHAJecitgwx
5
Lmf6WOqPA2LOd7Qm
5
TcUb75QrVdYnF7Jo
5
vkOg9ZAiUgUT87k6
5
5b3hiinYGzq6w4pf
5
i9mHGlQpKTxWJi3c
5
i9mHGlQpKTxWJi3c
5
fpagLAvfcxihdeKu
5
fpagLAvfcxihdeKu
5
WD5CdncqGKUhvzg6
5
foqzafIbpkaOo1WE
5
2miK4vEQrIwr05Is
5
4HRZrpWMB3Y7XJji
5
dVFbdKN0j7eGtsMZ
5
4Ftif9qQReU76eG4
5
MwnVtpCds5ncb4zO
5
MwnVtpCds5nc

5
TCqJF2Gy33tHHRVl
5
TCqJF2Gy33tHHRVl
5
4bwP0c8VldcIippE
5
fdvj7qmxOIvZPEeA
5
fdvj7qmxOIvZPEeA
5
crRmi8rnDrWx2VZk
5
crRmi8rnDrWx2VZk
5
6q82iduttMTsOFW9
5
K1A5q727KahlnvNs
5
y71pnqFOW9Ftxn7C
5
j8qrlsfqWAPqzqD9
5
DxCDyaf8cWn57YCi
5
mlmOhyhaiYh5soJC
5
mlmOhyhaiYh5soJC
5
qyoVU7bw5iq8QNaS
5
aKoaf7CavEEx5Nvd
5
ETwuGNnkTlhp0IEs
5
w50Lfb8pr9mE7OX5
5
IS7czHc804x9Os86
5
ohJmc105kCCV41gN
5
Bazvs0LCxtQVDVmU
5
URaenK8075LAqt4S
5
LylaqKzFJ98s7EuR
5
vmSuHEOhZAflVrvE
5
vmSuHEOhZAflVrvE
5
36CuI2BqccsbuKTd
5
36CuI2BqccsbuKTd
5
oEuMJpIYqVZzAIuN
5
FejurLx3miAzKEld
5
FejurLx3miAzKEld
5
tl1NkiYEmR0IBDFF
5
tl1NkiYEmR0IBDFF
5
4Ktg0oEpvRsrNYRM
5
4Ktg0oEpvRsrNYRM
5
r99sfF4yLT4uEOoE
5
DOlk8ct9zA9uIFX0
5
3Y38upRWxIOVg0Uq
5
jKrxXcOq3Q6gpriI
5
ebsr6NShYvKqDsgl
5
5eDv39ZrfYmgtj3b
5
11BnjH9KP5mjrtuj
5
11BnjH9KP5mjrtuj
5
VT3mPpvZkjxW3LoZ
5
OFf3Hkbjo1hl6JWr
5
k0IbplQHcpkSiatN
5
2f8AHJXYtJ2qWCdN
5
EwCdwXIxYWdi3Odz
5
RST57JJzLeQ1Vx73
5
RST57JJzLeQ1Vx73
5
RST57JJzLeQ1Vx73
5
RST57JJzLeQ1Vx73
5
RST57JJzLeQ1Vx73
5
RST57JJzLe

96OiJ00Yzwojj4GD
5
JvBhSi6I7ANRrcO1
5
0xieNpJEtHyOpypj
5
Sn4aZNRu99oEkWxs
5
KPnV8gDixQuyqgwF
5
3NMztCunn5w05vzy
5
AokXV8yNzRXOBhx4
5
P9ze6G9FMr4OiERY
5
z56RElaEvJEsNlR6
5
bZGpEq21K1dnr1z1
5
bZGpEq21K1dnr1z1
5
bZGpEq21K1dnr1z1
5
bZGpEq21K1dnr1z1
5
bZGpEq21K1dnr1z1
5
TWfPeAbGuDdl0hNW
5
3eWIlYXmyOrblpXI
5
oAwswnsLPg1AwwAb
5
oAwswnsLPg1AwwAb
5
QB21XNy13HiMBpvl
5
WxS0vYWUetVimlEG
5
1cOPjF4jBd2sWa6Z
5
oIj3tI2JNPqBoID1
5
oIj3tI2JNPqBoID1
5
xhR5W3K53lEOizCM
5
xhR5W3K53lEOizCM
5
xhR5W3K53lEOizCM
5
xhR5W3K53lEOizCM
5
Qroh1jw3xR2yjaM9
5
KJq10i90d8CniGK0
5
KJq10i90d8CniGK0
5
IgQT25VkuSrkKXEX
5
V0cr4VZ5cCfFPlTx
5
V0cr4VZ5cCfFPlTx
5
V0cr4VZ5cCfFPlTx
5
vcu7uPBD2Ll2aZc6
5
rrdk3UXEdIqQA1dP
5
Lr07fhk9Nrjh3pQB
5
g5nb8zHJ4V4Z5lpK
5
6CYt8VEK0nlsbpcM
5
6CYt8VEK0nlsbpcM
5
6CYt8VEK0nlsbpcM
5
43IupGoA4KBmShMy
5
TOFpFkUpIG9K6pwx
5
b0Xqwik93kaSH3rS
5
q2gOeNvnuYeusMvi
5
gaMF0Phkp5YdNFS4
5
TUyo2k5HkQxUqgL2
5
TUyo2k5HkQxUqgL2
5
6fBGhprmO1AGjA3k
5
OmMiCfVoQ0zfhg9Z
5
2GewexOFuJ9Wj9e7
5
aqCI4Y9vFJhT13Dw
5
vCjPncpWVbgY

In [245]:
user_sequences["user_007XIjOr"]

array([[['DaTQ53TUmfP93FSr', 0.5, 0.5789473684210527, ..., 1.0,
         0.3168836943141912, 0.04108890664675789],
        ['DaTQ53TUmfP93FSr', 0.5, 0.5789473684210527, ..., 1.0,
         0.3168836943141912, 0.04108890664675789],
        ['dGeyvi5WCOjDU7da', 0.5, 0.25263157894736843, ..., 1.0,
         0.49207296739850886, 0.043302781868636585],
        ['dGeyvi5WCOjDU7da', 0.5, 0.25263157894736843, ..., 1.0,
         0.49207296739850886, 0.043302781868636585],
        ['dGeyvi5WCOjDU7da', 1.0, 0.25263157894736843, ..., 1.0,
         0.49207296739850886, 0.043302781868636585]],

       [['DaTQ53TUmfP93FSr', 0.5, 0.5789473684210527, ..., 1.0,
         0.3168836943141912, 0.04108890664675789],
        ['dGeyvi5WCOjDU7da', 0.5, 0.25263157894736843, ..., 1.0,
         0.49207296739850886, 0.043302781868636585],
        ['dGeyvi5WCOjDU7da', 0.5, 0.25263157894736843, ..., 1.0,
         0.49207296739850886, 0.043302781868636585],
        ['dGeyvi5WCOjDU7da', 1.0, 0.25263157894736843, ..., 1.0

In [236]:

for user in user_sequences:
#     for row in user_sequences[user]:
#         print(row, len(row))
    print(len(user_sequences[user]))
#     break

455
679
663
679
667
439
987
587
687
823


In [246]:
user_targets["user_007XIjOr"]

array(['dGeyvi5WCOjDU7da', 'dGeyvi5WCOjDU7da', 'dGeyvi5WCOjDU7da',
       'dGeyvi5WCOjDU7da', 'qUm54NYOjeFhmKYx', 'FtnuMT1DlevSR2n5',
       'FtnuMT1DlevSR2n5', 'LHETTZcSZLeaVOGh', 'LHETTZcSZLeaVOGh',
       'LHETTZcSZLeaVOGh', 'LHETTZcSZLeaVOGh', 'zXMHUt57MEYgpgIz',
       'zXMHUt57MEYgpgIz', 'zXMHUt57MEYgpgIz', 'zXMHUt57MEYgpgIz',
       'zXMHUt57MEYgpgIz', 'zXMHUt57MEYgpgIz', 'zXMHUt57MEYgpgIz',
       'zXMHUt57MEYgpgIz', 'zXMHUt57MEYgpgIz', 'zXMHUt57MEYgpgIz',
       'zXMHUt57MEYgpgIz', 'zXMHUt57MEYgpgIz', 'zXMHUt57MEYgpgIz',
       'zXMHUt57MEYgpgIz', 'zXMHUt57MEYgpgIz', 'zXMHUt57MEYgpgIz',
       'zXMHUt57MEYgpgIz', 'zXMHUt57MEYgpgIz', 'zXMHUt57MEYgpgIz',
       'zXMHUt57MEYgpgIz', 'zXMHUt57MEYgpgIz', 'zXMHUt57MEYgpgIz',
       'zXMHUt57MEYgpgIz', 'zXMHUt57MEYgpgIz', 'zXMHUt57MEYgpgIz',
       'zXMHUt57MEYgpgIz', 'zXMHUt57MEYgpgIz', 'zXMHUt57MEYgpgIz',
       'zXMHUt57MEYgpgIz', 'zXMHUt57MEYgpgIz', 'zXMHUt57MEYgpgIz',
       'QDtAFWCvEaRwFPnC', 'QDtAFWCvEaRwFPnC', 'MFVaxfkxZAUc4C

In [264]:
id_information = pd.read_csv('../data/id_information.csv', sep='\t')
id_information.loc[id_information['id'] == '2Y8LxNpgQaUNV2EA']

Unnamed: 0,id,artist,song,album_name
4557,2Y8LxNpgQaUNV2EA,John Maus,Episode,Addendum
