In [None]:
###imports from python libraries
import numpy as np
import pandas as pd
import torch
import csv
from torch.utils.data import DataLoader

### imports 
from Encoding.Encode import music_encode, user_encode, user_dynamic_encode, lonehotenc
from DataCleaning.Creating_unique_list import unique_artists, unique_composers, unique_genre, unique_language, unique_user_features
from Model.dnn import DNN
from Model.rnn import RNN
from train import DTNMR, DTNMRWrapper
from dataset import MRSDataset

In [None]:
def build_dict (train,songs,users):
    ###
    # Forms a dictionary of users and songs listened
    # Filter out entires from train, based on the final_songs and final_users data (obtained after cleaing of available data)
    ###
    
    user_songs={}
    train_points=[]
    for index, row in train.iterrows():
        ### checking entry in final_users and final_songs
        if row['msno'] in users['msno'].tolist() and row['song_id'] in songs['song_id'].tolist():
            train_points.append([row['msno'],row['song_id']])
        ### appending in user_songs dictionary
        if row['song_id'] in songs['song_id'].tolist():
            if row['msno'] not in user_songs.keys():
                user_songs[row['msno']]=[]
            user_songs[row['msno']].append([row['song_id'],row['source_system_tab']])
    return user_songs, train_points

In [None]:
### reading files
songs = pd.read_csv('DataCleaning/final_songs.csv')
users = pd.read_csv('DataCleaning/final_users.csv')
train = pd.read_csv('DataCleaning/final_train.csv')
print("files read")

In [None]:
### loading data
train['source_system_tab'].fillna('none', inplace=True)
usb_dict, train_points = build_dict(train,songs,users)
print("Data Loading Done")

In [None]:
### taking unique entires from final_songs.csv
genre = unique_genre(songs)
artist = unique_artists(songs)
composer = unique_composers(songs)
language = unique_language(songs)

In [None]:
### music encoding 
encoded_music = music_encode(songs,genre,artist,composer,language)
print("Music Encoding Done")

In [None]:
### forming lists of unique city, age and gender from users data
city, age, gender = unique_user_features(users)

In [None]:
### user encoding - based on user and encoding of song listened
encoded_user_intrinsic = user_encode(users,gender,city,age,usb_dict,encoded_music)
print("User Intrinsic Encoding Done")

In [None]:
data={}
### transforming data from train.csv in form of dic- key(user_id):val(list of (song,behaviour))
for i in train.index:
    if train['msno'][i] not in data:
        data[train['msno'][i]]=[]
    data[train['msno'][i]].append((train['song_id'][i],train['source_system_tab'][i]))

In [None]:
### unique list of behaviours 
behaviour=train['source_system_tab'].tolist()
behaviour=np.unique(np.array(behaviour))
behaviour=behaviour.tolist()

In [None]:
### one hot encoding of behaviour
benum = {k:i for i, k in enumerate(behaviour)}
behaviour_enc = {}
for b in behaviour:
    behaviour_enc[b] = lonehotenc(benum,b)

In [None]:
train_set = MRSDataset(train_points,data,users,songs,encoded_user_intrinsic,encoded_music,behaviour_enc,4,10,train['target'].tolist())
train_dl = DataLoader(train_set, batch_size=16, shuffle=True,collate_fn=lambda x: x)

In [None]:
### Defining model
model = DTNMR(9972,9939,9,st_playlist_len=5,emb_size=32)

In [None]:
### Optimizing parameters
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

In [None]:
system = DTNMRWrapper(model,train_dl,train_dl,optimizer)
system.train(epochs=1)