In [84]:
import os
import numpy as np
import tensorly as tl
from tensorly.decomposition import parafac
from tensorly.decomposition import robust_pca
import datetime
import csv

In [85]:
# Read all the file name and store into a list
path = "/Users/Ryan/Desktop/local-tensor/former"
files = os.listdir(path)
files.remove('.DS_Store')

In [86]:
# Put all the data in each file into a whole list
data = []
for idx, file in enumerate(files):
    # segment / time / frequency / user_id 
    data_type = np.dtype([ ('segment', 'int32'), ('time', 'int8'), ('frequency', 'int8')])
    file_data = np.loadtxt(path + "/" + file, dtype=data_type)
    for i in file_data:
        i = list(i)
        # append a column user_id
        i.append(idx)
        data.append(i)

In [87]:
# Get the size of the tensor
data = np.array(data)
num_segments = data[:,0].max()
num_users = len(files)
num_timeInterval = 48

In [88]:
# Create a all zeros tensor
tensor = np.zeros((num_users, num_segments, num_timeInterval))

In [90]:
# Fill the tensor with existing data
for row in data:
    x = row[3]
    y = row[0] - 1
    z = row[1]
    val = row[2]
    tensor[x][y][z] = val

In [91]:
# Using tensor decomposition lib
start = datetime.datetime.now()

rank = 10
X = tl.tensor(tensor, dtype=tl.float32)
factors, errors = parafac(X, rank=rank, return_errors=True, n_iter_max = 1000)

end = datetime.datetime.now()

In [92]:
# Get the predictic matrix from the factors
pred_matrix = tl.kruskal_to_tensor(factors)

In [93]:
# Calculate the pred_error and other param
pred_err = 0
for row in data:
    x = row[3]
    y = row[0] - 1
    z = row[1]
    pred_err += np.sqrt(np.square(X[x][y][z] - pred_matrix[x][y][z]))

pred_err = pred_err / len(data)
time = (end - start).total_seconds()
iteration = len(errors)

In [94]:
# store the result into result
result = [num_users, num_segments, num_timeInterval, rank, iteration, errors[-1], time]

In [95]:
# Experiment in different rank
results = []
header = ["user", "segment", "interval", "rank", "iteration", "error", "time"]

results.append(header)
results.append(result)

In [96]:
# Write the experiment results into the output file
with open('output.csv', 'a') as resultFile:
    wr = csv.writer(resultFile, dialect='excel')
    wr.writerows(results)