# Tensor Pipeline

## Install Libraries

In [None]:
%pip install dill

Collecting dill
  Downloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m1.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: dill
Successfully installed dill-0.3.8


## Import Google Drive Files

In [None]:
from google.colab import drive
from google.colab import files

drive.mount('/content/gdrive')

import sys
sys.path.append("/content/gdrive/MyDrive/The Pipeline/2D_Separable")

## Import Libraries

In [None]:
#Import sklearn stuff
import datetime
import numpy as np

#Used to load data from pkl file
import dill

#Import External Files
from KFoldCV import KFoldCV
from train_test import train_test
from DataGenerationB import *

## Generate Data

In [None]:
n_train = 5000
n_test = 1000
tensor_dimensions = np.array([16, 16])
tensor_mode_ranks = np.array([4, 4])
separation_rank = 1

X_train_Full, Y_train_Full, X_test, Y_test, B_tensored = generate_data(n_train, n_test, tensor_dimensions, tensor_mode_ranks, separation_rank)

## Save Generated Data

In [None]:
formatted_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
pkl_file = f"/content/gdrive/MyDrive/The Pipeline/Experimental Data/64-64-4-4/Time:{formatted_time}, n_train:{n_train}, n_test:{n_test}, tensor_dimensions:{tensor_dimensions}, tensor_mode_ranks:{tensor_mode_ranks}, separation_rank:{separation_rank}.pkl"
with open(pkl_file, "wb") as file:
    dill.dump((X_train_Full, Y_train_Full, X_test, Y_test, B_tensored), file)

## Import Data

In [None]:
import pickle
pkl_file = "INSERT_FILE_NAME"
file= open(pkl_file, 'rb')
data = pickle.load(file)
file.close()

X_train_Full = data[0]
print(data[0].shape)

Y_train_Full = data[1]
print(data[1].shape)

X_test = data[2]
print(data[2].shape)

Y_test = data[3]
print(data[3].shape)

B_tensored = data[4]
print(data[4].shape)

## Subset Data

In [None]:
n_train = 200

#Subset X_train and Y_train
X_train = X_train_Full[0:(n_train),:,:]
Y_train = Y_train_Full[0:(n_train)]

## KFoldCV

In [None]:
#For now, define finite alpha set that we are searching over
alphas = [0, 0.1, 0.3, 0.5, 0.7, 0.9, 1, 1.5, 2, 2.5, 3, 3.5, 4, 5]

#Define Number of Folds we want
k_folds = 10
hypers = {'max_iter': 20, 'threshold': 1e-4, 'ranks': tuple(tensor_mode_ranks), 'separation_rank': separation_rank}
lambda1, validation_normalized_estimation_error, validation_nmse_losses, validation_correlations, validation_R2_scores, objective_function_information = KFoldCV(X_train, Y_train, B_tensored, alphas, k_folds, hypers)

## Train + Test Block

In [None]:
hypers = {'max_iter': 20, 'threshold': 1e-4, 'ranks': tuple(tensor_mode_ranks), 'separation_rank': separation_rank}
normalized_estimation_error, test_nmse_loss, test_R2_loss, test_correlation, objective_function_values = train_test(X_train, Y_train, X_test, Y_test, B_tensored, lambda1, hypers)

## Save Results

In [None]:
#Get current time and store in variable
formatted_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
pkl_file = f"/content/gdrive/MyDrive/The Pipeline/Experimental Results/64-64-4-4/ExecutionTime_{formatted_time}, n_train_{n_train}, n_test_100, p1_64, p2_64, d1_4, d2_4, S_2 _Dataset.pkl"

In [None]:
print("Error Record on Training _ After K-Fold CV")
print("Validation Normalized Estimation Error: ", validation_normalized_estimation_error)
print("Validation NMSE Losses: ", validation_nmse_losses)
print("Validation Correlations: ", validation_correlations)
print("Validation R2 Scores: ", validation_R2_scores)

In [None]:
print("Error Report on Testing _ With best Lambda")
print("Alpha chosen for model: ", lambda1)
print("Test Normalized Estimation Error: ", normalized_estimation_error)
print("Test NMSE Loss: ", test_nmse_loss)
print("Test R2 Loss: ", test_R2_loss)
print("Test Correlation: ", test_correlation)

In [None]:
with open(pkl_file, "wb") as file:
    dill.dump((X_train_Full, Y_train_Full, X_test, Y_test, B_tensored, lambda1, validation_normalized_estimation_error, validation_nmse_losses, validation_correlations, validation_R2_scores, objective_function_information, normalized_estimation_error, test_nmse_loss, test_R2_loss, test_correlation, objective_function_values), file)