In [1]:
import pandas as pd
import numpy as np
import datetime as dt
from keras.models import Sequential
from keras.layers import Dense, Dropout, Embedding, LSTM, Conv1D, MaxPooling1D, Flatten
from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
from keras.utils import np_utils
import matplotlib.pyplot as plt
import seaborn as sns
import keras
# import plotly.express as px
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from gensim.models import Word2Vec

In [2]:
listening_history = pd.read_csv('../data/listening_history.csv', sep='\t')
id_metadata = pd.read_csv('../data/id_metadata.csv', sep='\t')
id_information = pd.read_csv('../data/id_information.csv', sep='\t')

In [3]:
listening_history

Unnamed: 0,user,song,timestamp
0,user_007XIjOr,DaTQ53TUmfP93FSr,2019-02-20 12:28
1,user_007XIjOr,dGeyvi5WCOjDU7da,2019-02-20 12:35
2,user_007XIjOr,qUm54NYOjeFhmKYx,2019-02-20 12:48
3,user_007XIjOr,FtnuMT1DlevSR2n5,2019-02-20 12:52
4,user_007XIjOr,LHETTZcSZLeaVOGh,2019-02-20 13:09
...,...,...,...
5109587,user_zzWscYTy,BBiswLufo26YQCT7,2019-01-10 15:57
5109588,user_zzWscYTy,5ZHgff3sjETIiedr,2019-01-10 16:21
5109589,user_zzWscYTy,m4O1iLh6fC43xjRy,2019-01-10 16:48
5109590,user_zzWscYTy,mvUaP8k67qOFfA65,2019-01-10 21:13


In [4]:
# user = listening_history.loc[listening_history['user'] == 'user_007XIjOr']
listening_history['timestamp'] = pd.to_datetime(listening_history['timestamp'], format='%Y-%m-%d %H:%M')
listening_history.dtypes

user                 object
song                 object
timestamp    datetime64[ns]
dtype: object

In [5]:
morning_start = '05:00'
morning_end = '12:00'
day_start = '12:01'
day_end = '20:00'
night_start = '20:01'
night_end = '04:59'

morning_data = listening_history[(listening_history['timestamp'].dt.strftime('%H:%M') >= morning_start) & (listening_history['timestamp'].dt.strftime('%H:%M') < morning_end)]
day_data = listening_history[(listening_history['timestamp'].dt.strftime('%H:%M') >= day_start) & (listening_history['timestamp'].dt.strftime('%H:%M') < day_end)]
night_data = listening_history[(listening_history['timestamp'].dt.strftime('%H:%M') >= night_start) | (listening_history['timestamp'].dt.strftime('%H:%M') < morning_start)]

In [6]:
morning_data

Unnamed: 0,user,song,timestamp
92,user_007XIjOr,4g0JdsD03ZguB1Ko,2019-02-25 06:00:00
443,user_00Y0Uqn9,Pf7CTDMAqnLhbcY2,2019-02-08 07:41:00
444,user_00Y0Uqn9,KKmk2QJLVj2Aqsjg,2019-02-08 07:43:00
445,user_00Y0Uqn9,7Gsb7yKtAJNCvWez,2019-02-08 07:47:00
446,user_00Y0Uqn9,3shrACNQQ5eeQEAO,2019-02-08 07:50:00
...,...,...,...
5109462,user_zzWscYTy,K4k8FCQljsdr4EKC,2019-01-08 11:59:00
5109568,user_zzWscYTy,FfAOWFUOWdSn0M08,2019-01-10 10:28:00
5109569,user_zzWscYTy,4R1LOOq2sAnvmUhS,2019-01-10 10:33:00
5109570,user_zzWscYTy,Zm3pSVYvApmD7Ioa,2019-01-10 10:45:00


In [7]:
day_data

Unnamed: 0,user,song,timestamp
0,user_007XIjOr,DaTQ53TUmfP93FSr,2019-02-20 12:28:00
1,user_007XIjOr,dGeyvi5WCOjDU7da,2019-02-20 12:35:00
2,user_007XIjOr,qUm54NYOjeFhmKYx,2019-02-20 12:48:00
3,user_007XIjOr,FtnuMT1DlevSR2n5,2019-02-20 12:52:00
4,user_007XIjOr,LHETTZcSZLeaVOGh,2019-02-20 13:09:00
...,...,...,...
5109585,user_zzWscYTy,13avOnDUqkbSzRyI,2019-01-10 15:36:00
5109586,user_zzWscYTy,5ZHgff3sjETIiedr,2019-01-10 15:48:00
5109587,user_zzWscYTy,BBiswLufo26YQCT7,2019-01-10 15:57:00
5109588,user_zzWscYTy,5ZHgff3sjETIiedr,2019-01-10 16:21:00


In [8]:
night_data

Unnamed: 0,user,song,timestamp
15,user_007XIjOr,iMZ6ZUqtQlUVG0oW,2019-02-21 00:04:00
16,user_007XIjOr,lHdsYHWV2aB5qYvE,2019-02-21 00:15:00
17,user_007XIjOr,D098jupnftqfLnvL,2019-02-21 00:29:00
18,user_007XIjOr,dwpODe8SVpfU7u1W,2019-02-21 00:33:00
19,user_007XIjOr,jH2ACbKWbK1AlGLB,2019-02-21 00:39:00
...,...,...,...
5109565,user_zzWscYTy,G8ueEOh6T3ep0kTa,2019-01-10 01:58:00
5109566,user_zzWscYTy,HoaPreGR3rAe5uqo,2019-01-10 02:01:00
5109567,user_zzWscYTy,y5iI2R8d4Zv1VCPs,2019-01-10 02:11:00
5109590,user_zzWscYTy,mvUaP8k67qOFfA65,2019-01-10 21:13:00


In [22]:
def determine_time_zone(timestamp):
    hour = timestamp.hour
    if 6 <= hour < 12:
        return 0
    elif 12 <= hour < 20:
        return 1
    else:
        return 2

In [23]:
listening_history['time_zone'] = listening_history['timestamp'].apply(determine_time_zone)
listening_history

Unnamed: 0,user,song,timestamp,time_zone
0,user_007XIjOr,DaTQ53TUmfP93FSr,2019-02-20 12:28:00,1
1,user_007XIjOr,dGeyvi5WCOjDU7da,2019-02-20 12:35:00,1
2,user_007XIjOr,qUm54NYOjeFhmKYx,2019-02-20 12:48:00,1
3,user_007XIjOr,FtnuMT1DlevSR2n5,2019-02-20 12:52:00,1
4,user_007XIjOr,LHETTZcSZLeaVOGh,2019-02-20 13:09:00,1
...,...,...,...,...
5109587,user_zzWscYTy,BBiswLufo26YQCT7,2019-01-10 15:57:00,1
5109588,user_zzWscYTy,5ZHgff3sjETIiedr,2019-01-10 16:21:00,1
5109589,user_zzWscYTy,m4O1iLh6fC43xjRy,2019-01-10 16:48:00,1
5109590,user_zzWscYTy,mvUaP8k67qOFfA65,2019-01-10 21:13:00,2


In [26]:
morning_data = listening_history[listening_history['time_zone'] == 0]
day_data = listening_history[listening_history['time_zone'] == 1]
night_data = listening_history[listening_history['time_zone'] == 2]

In [27]:
morning_train, morning_test = train_test_split(morning_data, test_size=0.2, random_state=42)

day_train, day_test = train_test_split(day_data, test_size=0.2, random_state=42)

night_train, night_test = train_test_split(night_data, test_size=0.2, random_state=42)