In [1]:
import pandas as pd
import numpy as np

In [2]:
index_names = ['User'+str(x) for x in range(5)]
movie_names = ['Star Trek','Interstellar','Inception','Silver Linings Playbook','The Perks of Being a Wallflower']
times = ['Morning','Afternoon','Evening','Night']

In [3]:
time2id = {v:k for k,v in enumerate(times)}
id2time = {k:v for k,v in enumerate(times)}

In [4]:
index2id = {v:k for k,v in enumerate(index_names)}
id2index = {k:v for k,v in enumerate(index_names)}

In [5]:
movie2id = {v:k for k,v in enumerate(movie_names)}
id2movie = {k:v for k,v in enumerate(movie_names)}

In [6]:
id2time

{0: 'Morning', 1: 'Afternoon', 2: 'Evening', 3: 'Night'}

In [7]:
movie2id

{'Inception': 2,
 'Interstellar': 1,
 'Silver Linings Playbook': 3,
 'Star Trek': 0,
 'The Perks of Being a Wallflower': 4}

In [8]:
index2id

{'User0': 0, 'User1': 1, 'User2': 2, 'User3': 3, 'User4': 4}

In [9]:
dataset = pd.DataFrame(index=index_names,columns=movie_names)
dataset = dataset.fillna(0)

In [10]:
dataset.loc['User1']['Star Trek'] = 5
dataset.loc['User1']['Inception'] = 3
dataset.loc['User2']['Interstellar'] = 3
dataset.loc['User2']['Inception'] = 4
dataset.loc['User3']['Interstellar'] = 3
dataset.loc['User3']['Inception'] = 4
dataset.loc['User3']['Star Trek'] = 5
dataset.loc['User0']['Silver Linings Playbook'] = 5
dataset.loc['User0']['The Perks of Being a Wallflower'] = 3
dataset.loc['User0']['Star Trek'] = 3
dataset.loc['User4']['Silver Linings Playbook'] = 4
dataset.loc['User4']['The Perks of Being a Wallflower'] = 5

In [11]:
dataset

Unnamed: 0,Star Trek,Interstellar,Inception,Silver Linings Playbook,The Perks of Being a Wallflower
User0,3,0,0,5,3
User1,5,0,3,0,0
User2,0,3,4,0,0
User3,5,3,4,0,0
User4,0,0,0,4,5


In [12]:
flattened_dataset = dataset.unstack().reset_index()

In [13]:
columns = ['Movie','User','Rating']

In [14]:
flattened_dataset.columns = columns

In [15]:
time2id

{'Afternoon': 1, 'Evening': 2, 'Morning': 0, 'Night': 3}

In [16]:
np.random.seed(42)

In [17]:
def data_sampler(movie_name):
    if movie_name == 'Star Trek' or movie_name == 'Interstellar' or movie_name == 'Inception':
        return np.random.choice(['Morning','Afternoon'])
    else:
        return np.random.choice(['Evening','Night'])

In [18]:
flattened_dataset['Preferred_Time'] = flattened_dataset['Movie'].apply(lambda x: data_sampler(x))

In [19]:
flattened_dataset.to_csv('generated_dataset.csv')

In [20]:
movie2id

{'Inception': 2,
 'Interstellar': 1,
 'Silver Linings Playbook': 3,
 'Star Trek': 0,
 'The Perks of Being a Wallflower': 4}

In [21]:
flattened_dataset['Movie'] = flattened_dataset['Movie'].apply(lambda x: movie2id[x])

In [22]:
flattened_dataset['User'] = flattened_dataset['User'].apply(lambda x: index2id[x])

In [23]:
flattened_dataset['Preferred_Time'] = flattened_dataset['Preferred_Time'].apply(lambda x: time2id[x])

In [28]:
flattened_dataset = flattened_dataset[['Movie','User','Preferred_Time','Rating']]

In [29]:
main_tensor = np.zeros((5,5,4))
for row in flattened_dataset.values:
    main_tensor[row[0],row[1],row[2]] = row[3]

In [33]:
from sktensor import dtensor, cp_als

In [34]:
T = dtensor(main_tensor)

In [53]:
P, fit, itr, exectimes = cp_als(T, 2, init='random')

In [57]:
P.U[1]

unfolded_dtensor([[ 1.00000000e+00,  2.39863503e-01],
                  [-5.94651905e-19,  3.38650165e-01],
                  [-1.59311129e-18,  6.00227494e-01],
                  [ 5.12930280e-18,  1.00000000e+00],
                  [ 8.00090880e-01, -2.82620841e-18]])

In [58]:
P.U[2]

unfolded_dtensor([[ 5.33196567e-10,  1.00000000e+00],
                  [ 9.20963537e-12,  8.90422006e-02],
                  [ 1.00000000e+00, -8.42903606e-13],
                  [-1.04279486e-02, -1.86580628e-13]])

In [62]:
P.U[0]

unfolded_dtensor([[ 1.17474734e-09,  7.91489075e-01],
                  [-4.19325095e-10,  6.47266039e-01],
                  [-6.47840408e-10,  1.00000000e+00],
                  [ 1.00000000e+00, -7.46851762e-10],
                  [-1.22153455e-02,  8.89379366e-12]])