In [1]:
import os
import numpy as np
import tensorly as tl
from tensorly.decomposition import parafac
import datetime
import csv

Using numpy backend.


In [22]:
def tensor_decomposition(file):
    # Put the file data into a whole list
    data_type = np.dtype([('user', 'U10'), ('time', 'int8'), ('segment', 'int32'), ('frequency', 'int8')])
    file_data = np.loadtxt(path + "/" + file, dtype=data_type)
    
    # Store the user set & the segment set
    user_set = set(file_data['user'])
    segment_set = set(file_data['segment'])
    
    # Add two column / user_id / segment_id / into the data
    data = []
    for row in file_data:
        row = list(row)
        row.append(0)
        row.append(0)
        for idx, user in enumerate(user_set):
            if(row[0] == user):
                row[4] = idx
        for idx, segment in enumerate(segment_set):
            if(row[2] == segment):
                row[5] = idx
        data.append(row)
    # Now data looks like / user / time / segment / frequency / user_id / segment_id / 

    # Get the size of the tensor
    num_users = len(user_set)
    num_segments = len(segment_set)
    num_time_interval = 48

    # Create an all zeros tensor
    tensor = np.zeros((num_users, num_time_interval, num_segments))
    
    # Fill the tensor with existing data
    for row in data:
        # user id
        x = row[4]
        # time interval
        y = row[1]
        # segment id
        z = row[5] - 1
        # frequency
        val = row[3]
        tensor[x][y][z] = val

    # Set the rank of the tensor decomposition
    rank = len(user_set)
    if(rank >= 10):
        rank = 10
    
    # Using tensor decomposition lib
    start = datetime.datetime.now()
    X = tl.tensor(tensor, dtype=tl.float32)
    factors, errors = parafac(X, rank=rank, n_iter_max = 1000, return_errors=True)
    end = datetime.datetime.now()

    # Get the predictic matrix from the factors
    pred_matrix = tl.kruskal_to_tensor(factors)
    
    # Calculate the pred_error and other param
    pred_err = 0
    for row in data:
        x = row[4]
        y = row[1]
        z = row[5] - 1
        val = row[3]
        pred_err += np.sqrt(np.square(tensor[x][y][z] - pred_matrix[x][y][z]))
    pred_err = pred_err / len(data)
    time = (end - start).total_seconds()
    iteration = len(errors)
    
    # Store the result into result
    result = [num_users, num_segments, num_time_interval, rank, iteration, pred_err, time]

    return result

In [24]:
# Read all the file name and store into a list
path = "/Users/Ryan/Desktop/local-tensor/test"
files = os.listdir(path)
files.remove('.DS_Store')

header = ["user", "segment", "interval", "rank", "iteration", "error", "time"]
results = []
results.append(header)

for file in files:
    result = tensor_decomposition(file)
    results.append(result)

In [25]:
# Write the experiment results into the output file
with open('output.csv', 'a') as resultFile:
    wr = csv.writer(resultFile, dialect='excel')
    wr.writerows(results)