In [104]:
import os
import numpy as np
import tensorly as tl
from tensorly.decomposition import parafac
from tensorly.decomposition import robust_pca
import datetime
import csv
import tensortools as tt
from tensortools.operations import unfold as tt_unfold, khatri_rao

In [134]:
rank = 10

In [23]:
# Read all the file name and store into a list
path = "/home/master/Desktop/tensor/full"
files = os.listdir(path)

header = ["user", "segment", "interval", "rank", "error", "time"]
results = []
results.append(header)

file = files[0]

In [105]:
# Put the file data into a whole list
data_type = np.dtype([('user', 'U10'), ('time', 'int8'), ('segment', 'int64'), ('frequency', 'int8')])
file_data = np.loadtxt(path + "/" + file, dtype=data_type)

In [106]:
# Store the user set & the segment set
user_set = set(file_data['user'])
user_id = range(len(user_set))
user_id_dict = { user : user_id[idx] for idx,user in enumerate(user_set) }
id_user_dict = {v : k for k, v in user_id_dict.items()}

In [107]:
# Add a column / user_id / into the data
data = []
for row in file_data:
    row = list(row)
    row.append(0)
    for idx, user in enumerate(user_set):
        if(row[0] == user):
            row[4] = user_id_dict[user]
    data.append(row)

In [108]:
# Get the size of the tensor
segment = [i[2] for i in data] 
num_segments = max(segment)
num_users = len(user_set)
num_timeInterval = 48

In [109]:
# Create an all zeros tensor
tensor = np.zeros((num_users, num_timeInterval, num_segments))
mask = np.zeros((num_users, num_timeInterval, num_segments))

In [110]:
# Fill the tensor with existing data
for row in data:
    # user id
    x = row[4]
    # time interval
    y = row[1]
    # segment id
    z = row[2] - 1
    # frequency
    val = row[3]
    tensor[x][y][z] = val

In [111]:
# Fill the mask with existing data
for row in data:
    # user id
    x = row[4]
    # time interval
    y = row[1]
    # segment id
    z = row[2] - 1
    # frequency
    val = row[3]
    mask[x][y][z] = 1

In [113]:
# Using tensor decomposition lib
start = datetime.datetime.now()

X = tl.tensor(tensor, dtype=np.int32)
Xn = np.maximum(0, X + .1*np.random.randn(num_users,num_timeInterval, num_segments))
M = tl.tensor(mask, dtype=np.bool)
U = tt.mncp_hals(Xn, rank=rank, mask=M, verbose=False, max_iter =30)
factors = U.factors.factors

end = datetime.datetime.now()

In [114]:
# Save the users factor
users = []
for idx,row in enumerate(factors[0]):
    user = []
    user.append(id_user_dict[idx])
    for i in row:
        user.append(i)
    users.append(user)
with open('users.csv', 'a', newline='') as resultFile:
    wr = csv.writer(resultFile, dialect='excel')
    wr.writerows(users)

In [133]:
# Save the time factor
time = []
for idx in range(num_timeInterval):
    data = np.insert(factors[1][idx],0,int(idx))
    time.append(data)
with open('time.csv', 'a', newline='') as resultFile:
    wr = csv.writer(resultFile, dialect='excel')
    wr.writerows(time)

In [116]:
# Save the segments factor
segments = []
for idx in range(num_timeInterval):
    data = np.insert(factors[1][idx],0,int(idx))
    segments.append(data)
with open('segment.csv', 'a', newline='') as resultFile:
    wr = csv.writer(resultFile, dialect='excel')
    wr.writerows(segments)

In [117]:
# Get the predictic matrix from the factors
pred_matrix = tl.kruskal_to_tensor(factors)
pred_matrix.take(1)
# Calculate the pred_error and other param
pred_err = 0
for row in data:
    x = row[4]
    y = row[1]
    z = row[2] - 1
    pred_err += np.sqrt(np.square(Xn[x][y][z] - pred_matrix[x][y][z]))
pred_err = pred_err / len(data)
time = (end - start).total_seconds()
result = [num_users, num_segments, num_timeInterval, rank, pred_err, time]
results.append(result)

In [118]:
# Write the experiment results into the output file
with open('output.csv', 'a') as resultFile:
    wr = csv.writer(resultFile, dialect='excel')
    wr.writerows(results)