# Tensor Pipeline

## Install Libraries

In [1]:
%pip install dill

Collecting dill
  Downloading dill-0.3.8-py3-none-any.whl (116 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/116.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━[0m [32m112.6/116.3 kB[0m [31m3.5 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: dill
Successfully installed dill-0.3.8


## Import Google Drive Files

In [2]:
from google.colab import drive
from google.colab import files

drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [5]:
import sys
sys.path.append("/content/gdrive/MyDrive/Colab Notebooks/Tensor_Based_LRR/Proper Experiments/The Pipeline/2D_Separable/Rama/Intercept New")

## Import Libraries

In [6]:
#Import sklearn stuff
import datetime
import numpy as np

#Used to load data from pkl file
import dill

#standardscalar
from sklearn.preprocessing import StandardScaler

#Import External Files
from KFoldCV import KFoldCV
from train_test import train_test
from DataGenerationB import *

## Import Data

In [7]:
import pickle
pkl_file = "/content/gdrive/MyDrive/Colab Notebooks/Tensor_Based_LRR/Proper Experiments/The Pipeline/Experimental Data/16-16-4-4/Separation Rank 1/Bounded_Var_Time:2024-03-19 03:17:48, intercept:5,n_train:5000, n_test:1000, tensor_dimensions:[16 16], tensor_mode_ranks:[4 4], separation_rank:1.pkl"
file= open(pkl_file, 'rb')
data = pickle.load(file)
file.close()

X_train_Full = data[0]
print(data[0].shape)

Y_train_Full = data[1]
print(data[1].shape)

X_test_Full = data[2]
print(data[2].shape)

Y_test_Full = data[3]
print(data[3].shape)

B_tensored = data[4]
print(data[4].shape)

(5000, 16, 16)
(5000,)
(1000, 16, 16)
(1000,)
(16, 16)


## Subset Data

In [8]:
n_train = 300
n_test = 50

tensor_dimensions = np.array([16, 16])
tensor_mode_ranks = np.array([4, 4])
separation_rank = 2

#Subset X_train and Y_train
X_train = X_train_Full[0:(n_train),:,:]
Y_train = Y_train_Full[0:(n_train)]

#Subset X_test and Y_test
X_test = X_test_Full[0:(n_test),:,:]
Y_test = Y_test_Full[0:(n_test)]

In [9]:
#Preprocessing

# Reshape the 3D array to a 2D array where each row represents a sample
# The shape of the original 3D array is (n_samples, n_features_per_sample, n_dimensions)
# We reshape it to (n_samples, n_features_per_sample * n_dimensions)


X_train_2D = X_train.reshape(n_train, -1)
X_test_2D = X_test.reshape(n_test,-1)


# Initialize StandardScaler
scaler = StandardScaler()

# Fit scaler on train data and transform train data
X_train_scaled = scaler.fit_transform(X_train_2D)
# Transform test data using the scaler fitted on train data
X_test_scaled = scaler.transform(X_test_2D)

# Reshape the scaled data back to 3D
X_train = X_train_scaled.reshape(n_train, tensor_dimensions[0],tensor_dimensions[1])
X_test  = X_test_scaled.reshape(n_test, tensor_dimensions[0],tensor_dimensions[1])

#average response value
Y_train_mean = np.mean(Y_train)
# Mean centering y_train and y_test
Y_train = Y_train - Y_train_mean


print("Sample mean for each feature (across samples):",scaler.mean_)
print("Sample variance for each feature (across samples):",scaler.var_)
print('Response Average:',Y_train_mean)

Sample mean for each feature (across samples): [-6.90638916e-03 -1.07118450e-02  3.05368645e-02 -1.77202686e-02
  6.05110437e-02  5.01222366e-02  2.13756924e-02  5.55990392e-02
  1.65477681e-02  1.25698598e-02  5.68863083e-03 -1.75046857e-02
 -2.67299094e-02  2.06429216e-03  1.65035270e-02  3.02614736e-02
  3.17450773e-02  4.29855776e-02 -2.16422743e-02 -9.53024069e-04
 -1.11463566e-02  2.53936240e-02 -4.40079011e-02  1.01103408e-02
  2.52208070e-02  2.90230822e-02  1.82554507e-02 -5.06014571e-03
 -2.33501554e-03  5.99657557e-02  1.75590891e-03  3.12361842e-02
  1.86390294e-03  1.92942191e-02  1.83106089e-02 -4.03678070e-02
 -4.35337833e-02  2.38619156e-02 -8.01424894e-02  9.32364962e-03
 -1.59406533e-02  2.55236281e-02  2.98328722e-02  5.64017950e-03
  7.48533976e-03 -8.67972373e-02  2.22618851e-02 -3.17501313e-02
 -6.14837874e-02  3.87301079e-02 -5.82784282e-02  1.35240538e-02
 -1.51142788e-02  3.19697345e-02  4.24649833e-02  1.14573352e-03
  1.64873985e-02  6.55904461e-03 -3.0874430

## KFoldCV

In [10]:
#For now, define finite alpha set that we are searching over
alphas = [0,0.1,0.3, 0.5, 0.7, 0.9, 1, 1.5, 2, 2.5, 3, 5, 4,10,15,20,50,100]

#Define Number of Folds we want
k_folds = 10
hypers = {'max_iter': 200, 'threshold': 1e-4, 'ranks': tuple(tensor_mode_ranks), 'separation_rank': separation_rank}
lambda1, validation_normalized_estimation_error, validation_nmse_losses, validation_correlations, validation_R2_scores, objective_function_information = KFoldCV(X_train, Y_train, B_tensored, alphas, k_folds, hypers, intercept= False)

intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is n

## Train + Test Block

In [15]:
hypers = {'max_iter': 200, 'threshold': 1e-4, 'ranks': tuple(tensor_mode_ranks), 'separation_rank': separation_rank}
normalized_estimation_error, test_nmse_loss, test_R2_loss, test_correlation, objective_function_values = train_test(X_train, Y_train, X_test, Y_test, B_tensored, lambda1, hypers,Y_train_mean,intercept= False)

intercept is not initialized
Y_test_predicted: [ -2.2983955  -13.71296072  15.31114863   1.16854258  -1.01446254
   9.92499923  16.00134991  15.4753178    7.99844674  14.20666711
  15.56020912  -7.17307755  11.35935211  -8.068083    10.75259092
  -6.28371402  -4.64722271  17.48821229  12.63973112  -0.79316756
  -8.03194267   7.94085885   1.99942579  10.72730654  18.00769212
   3.6290287    3.73801605  15.45147533   5.25122422   3.03599238
  -6.91139039  15.59305158 -13.6826375    5.53377075   2.45763905
  14.72847183  15.89185635  -8.50746033   9.97484751   7.15727262
  -7.90613451  -7.36331879  -2.74608914   4.34477585  14.17648379
  14.21234198   6.76891662   7.48060905   3.40677726   8.60814904], Y_test: [ -2.47365852 -13.57734122  15.15337006   0.92134158  -1.27834153
   9.93543064  15.88093885  15.66436044   7.4974047   14.4032061
  15.39502757  -7.09568432  10.83775236  -8.18120228  11.04364077
  -6.17113238  -4.35303994  17.84008877  12.58729614  -0.77864401
  -8.11823023   7.58

## Save Results

In [16]:
#Get current time and store in variable
formatted_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
max_iter = hypers['max_iter']
pkl_file = f"/content/gdrive/MyDrive/Colab Notebooks/Tensor_Based_LRR/Proper Experiments/The Pipeline/Experimental Results/16-16-4-4/Tensor Results/Separation Rank 2/Int_ap_2ExecutionTime_intercept_0_{formatted_time}, n_train_{n_train},n_test_{n_test}, tensor_dimensions:{tensor_dimensions}, tensor_mode_= ranks:{tensor_mode_ranks}, separation_rank:{separation_rank}, max_iter={max_iter}.pkl"

In [17]:
print("Error Record on Training _ After K-Fold CV")
print("Validation Normalized Estimation Error: ", validation_normalized_estimation_error)
print("Validation NMSE Losses: ", validation_nmse_losses)
print("Validation Correlations: ", validation_correlations)
print("Validation R2 Scores: ", validation_R2_scores)

Error Record on Training _ After K-Fold CV
Validation Normalized Estimation Error:  [[0.25281803 0.25426789 0.25118484 0.2525859  0.25416269 0.25326458
  0.25369602 0.2537901  0.25322613 0.25394883 0.254004   0.25522815
  0.25374902 0.2546716  0.25644282 0.25834974 0.26773339 0.28207216]
 [0.25134653 0.25309122 0.25318267 0.25117377 0.25331935 0.25323177
  0.25247437 0.25298826 0.25314121 0.25356578 0.25490606 0.25482612
  0.25491097 0.25425714 0.25608635 0.2579646  0.26795622 0.28403611]
 [0.25159294 0.25196725 0.25359278 0.25295397 0.25153874 0.25655183
  0.25197731 0.25447932 0.25401423 0.25394934 0.25486239 0.25408818
  0.25381299 0.25473707 0.25688367 0.25831298 0.2680938  0.28209156]
 [0.25709858 0.25251171 0.25382169 0.25383956 0.25471198 0.25193549
  0.25256721 0.25317005 0.25334635 0.2531515  0.25574082 0.25444417
  0.2535841  0.25502262 0.25694639 0.25879108 0.26809282 0.28304653]
 [0.25331995 0.25332763 0.25073254 0.25227178 0.25356526 0.25327914
  0.25514821 0.25228315 0.25

In [19]:
print("Error Report on Testing _ With best Lambda")
print("Alpha chosen for model: ", lambda1)
print("Test Normalized Estimation Error: ", normalized_estimation_error)
print("Test NMSE Loss: ", test_nmse_loss)
print("Test R2 Loss: ", test_R2_loss)
print("Test Correlation: ", test_correlation)

Error Report on Testing _ With best Lambda
Alpha chosen for model:  4
Test Normalized Estimation Error:  0.2535628319532583
Test NMSE Loss:  0.0005130019818399947
Test R2 Loss:  0.9993312366142899
Test Correlation:  0.9996814299121779


In [None]:
with open(pkl_file, "wb") as file:
    dill.dump((X_train_Full, Y_train_Full, X_test_Full, Y_test_Full, B_tensored, lambda1, validation_normalized_estimation_error, validation_nmse_losses, validation_correlations, validation_R2_scores, objective_function_information, normalized_estimation_error, test_nmse_loss, test_R2_loss, test_correlation, objective_function_values), file)