In [1]:
import os
import numpy as np
import tensorly as tl
from tensorly.decomposition import parafac
from tensorly.decomposition import robust_pca
import datetime
import csv
import tensortools as tt
from tensortools.operations import unfold as tt_unfold, khatri_rao

Using numpy backend.


In [3]:
# Read all the file name and store into a list
path = "/Users/Ryan/Desktop/local-tensor/former"
files = os.listdir(path)
files.remove('.DS_Store')

In [4]:
# Put all the data in each file into a whole list
data = []
for idx, file in enumerate(files):
    # segment / time / frequency / user_id 
    data_type = np.dtype([ ('segment', 'int32'), ('time', 'int8'), ('frequency', 'int8')])
    file_data = np.loadtxt(path + "/" + file, dtype=data_type)
    for i in file_data:
        i = list(i)
        # append a column user_id
        i.append(idx)
        data.append(i)

In [5]:
# Get the size of the tensor
data = np.array(data)
num_segments = data[:,0].max()
num_users = len(files)
num_timeInterval = 48

In [6]:
# Create an all zeros tensor
tensor = np.zeros((num_users, num_segments, num_timeInterval))

In [7]:
# Fill the tensor with existing data
for row in data:
    x = row[3]
    y = row[0] - 1
    z = row[1]
    val = row[2]
    tensor[x][y][z] = val

In [8]:
# Create a mask
mask = np.zeros((num_users, num_segments, num_timeInterval))

In [9]:
# Fill the tensor with existing data
for row in data:
    x = row[3]
    y = row[0] - 1
    z = row[1]
    val = row[2]
    mask[x][y][z] = 1

In [10]:
# Using tensor decomposition lib
start = datetime.datetime.now()

rank = 2

X = tl.tensor(tensor, dtype=np.int32)
Xn = np.maximum(0, X + .1*np.random.randn(num_users, num_segments, num_timeInterval))

M = tl.tensor(mask, dtype=np.bool)

U = tt.mncp_hals(Xn, rank=rank, mask=M, verbose=False, max_iter =30)
factors = U.factors.factors

end = datetime.datetime.now()

In [11]:
# Get the predictic matrix from the factors
pred_matrix = tl.kruskal_to_tensor(factors)

In [12]:
# Calculate the pred_error and other param
pred_err = 0
for row in data:
    x = row[3]
    y = row[0] - 1
    z = row[1]
    pred_err += np.sqrt(np.square(Xn[x][y][z] - pred_matrix[x][y][z]))

pred_err = pred_err / len(data)
time = (end - start).total_seconds()

In [13]:
# Store the result into result
result = [num_users, num_segments, num_timeInterval, rank, pred_err, time]

In [14]:
results = []
header = ["user", "segment", "interval", "rank", "iteration", "error", "time"]

results.append(header)
results.append(result)

In [15]:
# Write the experiment results into the output file
with open('output.csv', 'a') as resultFile:
    wr = csv.writer(resultFile, dialect='excel')
    wr.writerows(results)