# Tensor Pipeline

## Setting Path

In [1]:
import sys
sys.path.append(rf'D:\Tensor Based ML for Neuro Imaging\INSPIRE_CAHBHIR\Python Scripts\LSR-Tensor-Ridge-Regression\Closed_Form_Solver\Code Files')
sys.path.append(rf'D:\Tensor Based ML for Neuro Imaging\INSPIRE_CAHBHIR\Python Scripts\LSR-Tensor-Ridge-Regression\Data_Sets\Synthetic Data\Uncentered X')

## Import Libraries

In [11]:
import datetime
import numpy as np
import dill
from KFoldCV import KFoldCV
from train_test import train_test
from DataGenerationB import *
from sklearn.preprocessing import StandardScaler
import platform
import time 

## Import Data

In [3]:
import pickle
pkl_file = rf"D:\Tensor Based ML for Neuro Imaging\INSPIRE_CAHBHIR\Python Scripts\LSR-Tensor-Ridge-Regression\Data_Sets\Synthetic Data\Uncentered X\ n_train_5000_ntest_1000_tensordim_[32 32]_tensor_ranks_[4 4]_seprank_2.pkl"
file= open(pkl_file, 'rb')
data = pickle.load(file)
file.close()

X_train_Full = data[0]
print(data[0].shape)

Y_train_Full = data[1]
print(data[1].shape)

X_test_Full = data[2]
print(data[2].shape)

Y_test_Full = data[3]
print(data[3].shape)

B_tensored = data[4]
print(data[4].shape)

(5000, 32, 32)
(5000,)
(1000, 32, 32)
(1000,)
(32, 32)


## Subset Data

In [4]:
n_train_all = [650]
n_test = 100

tensor_dimensions = np.array([32, 32])
tensor_mode_ranks = np.array([4, 4])
separation_rank = 2


In [5]:
for i,n_train in enumerate(n_train_all):
  print('Number of Samples:',n_train,'---------------------------------------------------------------------------------------------------------------------------')

  #Subset X_train and Y_train
  X_train = X_train_Full[0:(n_train),:,:]
  Y_train = Y_train_Full[0:(n_train)]

  #Subset X_test and Y_test
  X_test = X_test_Full[0:(n_test),:,:]
  Y_test = Y_test_Full[0:(n_test)]


  #Preprocessing

  # Reshape the 3D array to a 2D array where each row represents a sample
  # The shape of the original 3D array is (n_samples, n_features_per_sample, n_dimensions)
  # We reshape it to (n_samples, n_features_per_sample * n_dimensions)


  X_train_2D = X_train.reshape(n_train, -1)
  X_test_2D = X_test.reshape(n_test,-1)


  # Initialize StandardScaler
  scaler = StandardScaler(with_std = False) #standard scalar only

  # Fit scaler on train data and transform train data
  X_train_scaled = scaler.fit_transform(X_train_2D)
  # Transform test data using the scaler fitted on train data
  X_test_scaled = scaler.transform(X_test_2D)

  # Reshape the scaled data back to 3D
  X_train = X_train_scaled.reshape(n_train, tensor_dimensions[0],tensor_dimensions[1])
  X_test  = X_test_scaled.reshape(n_test, tensor_dimensions[0],tensor_dimensions[1])

  #average response value
  Y_train_mean = np.mean(Y_train)
  #Mean centering y_train and y_test
  Y_train = Y_train - Y_train_mean


  #print("Sample mean for each feature (across samples):",scaler.mean_)
  #print("Sample variance for each feature (across samples):",scaler.var_)
  #print('Response Average:',Y_train_mean)

  #For now, define finite alpha set that we are searching over
  alphas = [0,0.1,0.3, 0.5, 0.7, 0.9, 1, 1.5, 2, 2.5, 3, 5, 4,10,15,20,50,100]

  #Define Number of Folds we want
  k_folds = 10
  hypers = {'max_iter': 50, 'threshold': 1e-4, 'ranks': tuple(tensor_mode_ranks), 'separation_rank': separation_rank}
  lambda1, validation_normalized_estimation_error, validation_nmse_losses, validation_correlations, validation_R2_scores, objective_function_information,gradient_information = KFoldCV(X_train, Y_train, alphas, k_folds, hypers,B_tensored,intercept= False)
  
  hypers = {'max_iter': 50, 'threshold': 1e-4, 'ranks': tuple(tensor_mode_ranks), 'separation_rank': separation_rank}
  normalized_estimation_error, test_nmse_loss, test_R2_loss, test_correlation, objective_function_values,gradient_values,iterate_level_values,factor_core_iteration = train_test(X_train, Y_train, X_test, Y_test, lambda1, hypers, Y_train_mean, B_tensored, intercept= False)
  gradient_values,iterate_level_values,factor_core_iteration
  #Get current time and store in variable
  formatted_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
  max_iter = hypers['max_iter']
  

  print("Error Record on Training _ After K-Fold CV")
  print("Validation Normalized Estimation Error: ", validation_normalized_estimation_error)
  print("Validation NMSE Losses: ", validation_nmse_losses)
  print("Validation Correlations: ", validation_correlations)
  print("Validation R2 Scores: ", validation_R2_scores)

  print("Error Report on Testing _ With best Lambda")
  print("Alpha chosen for model: ", lambda1)
  print("Test Normalized Estimation Error: ", normalized_estimation_error)
  print("Test NMSE Loss: ", test_nmse_loss)
  print("Test R2 Loss: ", test_R2_loss)
  print("Test Correlation: ", test_correlation)

#saving   
if platform.system() == 'Windows':
    formatted_time = time.strftime("%Y-%m-%d_%H-%M-%S")
    tensor_dimensions_str = "_".join(map(str, tensor_dimensions))
    tensor_mode_ranks_str = "_".join(map(str, tensor_mode_ranks))
    pkl_file = rf"D:\Tensor Based ML for Neuro Imaging\INSPIRE_CAHBHIR\Python Scripts\LSR-Tensor-Ridge-Regression_All_Data\Platform_For_Experimenmts\With_New_Dataset\K_Fold_CV\ExecutionTime_intercept_5_{formatted_time}_n_train_{n_train}_n_test_{n_test}_tensor_dimensions_{tensor_dimensions}_tensor_mode_ranks_{tensor_mode_ranks}_separation_rank_{separation_rank}_max_iter_{max_iter}.pkl"
elif platform.system() == 'Darwin':
    pkl_file = ''

with open(pkl_file, "wb") as file:
    dill.dump((X_train_Full, Y_train_Full, X_test_Full, Y_test_Full, B_tensored, lambda1, validation_normalized_estimation_error, validation_nmse_losses, validation_correlations, validation_R2_scores, objective_function_information,gradient_information,normalized_estimation_error, test_nmse_loss, test_R2_loss, test_correlation, objective_function_values,gradient_values,iterate_level_values,factor_core_iteration), file)

Number of Samples: 650 ---------------------------------------------------------------------------------------------------------------------------
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is not initialized
intercept is

OSError: [Errno 22] Invalid argument: 'D:\\Tensor Based ML for Neuro Imaging\\INSPIRE_CAHBHIR\\Python Scripts\\LSR-Tensor-Ridge-Regression_All_Data\\Platform_For_Experimenmts\\With_New_Dataset\\K_Fold_CV\\ExecutionTime_intercept_5_2024-07-07 18:20:15_n_train_650_n_test_100_tensor_dimensions_[32 32]_tensor_mode_ranks_[4 4]_separation_rank_2_max_iter_50.pkl'