In [None]:
%reset -f

In [None]:
import os
import numpy as np
from scipy import interpolate
import scipy.io
import Rbeast as rb
from SLM_tools import *
from scipy.io import savemat
import pickle
import re
import time

In [None]:
top_dir = r"C:\Users\User\OneDrive - mail.tau.ac.il\Documents\Final Project\Project2-Omri and Idan\Results" 

pattern = re.compile(
    r"(?P<type>distance|energy)_vec_mu_(?P<mu>[-+]?\d*\.\d+|\d+)_energy_(?P<energy>[-+]?\d*\.\d+|\d+)_run_num_(?P<run_num>\d+)_total_num_target_\d+\.mat"
)
distance_paths = []
energy_paths = []
seen_distance_paths = set()
seen_energy_paths = set()
mu = 1.6
path = r"C:\Users\User\OneDrive - mail.tau.ac.il\Documents\SA_UI\testing data"
# save_path = os.path.join(path, rf"{mu}_v3")
# os.mkdir(save_path)
save_path = r"C:\Users\User\OneDrive - mail.tau.ac.il\Documents\SA_UI\testing data\merged_data_1_6"
for root, dirs, files in os.walk(top_dir):
    for file in files:
        match = pattern.match(file)
        if match:
            file_info = match.groupdict()
            if file_info['mu'] == str(mu):
                full_path = os.path.join(root, file)
                if file_info['type'] == 'distance':
                    if full_path not in seen_distance_paths:
                        distance_paths.append(full_path)
                        seen_distance_paths.add(full_path)
                elif file_info['type'] == 'energy':
                    if full_path not in seen_energy_paths:
                        energy_paths.append(full_path)
                        seen_energy_paths.add(full_path)


In [None]:
len(energy_paths)

In [None]:
C = []
downsampling_factor = 10000
n_components = 3
features_extracted = 0
total_runtime = 0
files_with_assembly = 0
total_data_files = len(energy_paths)
log = []
counters = {"load":[],"extraction":[],"segmentation":[],"pca":None,"extracted_features":[]}
for i, (energy_path, distance_path) in enumerate(zip(energy_paths, distance_paths)):
    start1 = time.time()
    distance_data = scipy.io.loadmat(distance_path)['foo']
    energy_data = scipy.io.loadmat(energy_path)['foo'].T
    energy_data, distance_data, time_vec = SLM_tools.downsample(energy_data, distance_data,downsampling_factor)
    merged = np.concatenate((energy_data, distance_data), axis=1)
    merged_filename = os.path.join(save_path, f"merged_energy_distance_{i}_mu_{mu}.mat")
    sio.savemat(merged_filename, {"energy_distance":merged})

In [None]:
    loading_time = round(time.time() - start1,3)
    log1 = f"\nfile {i+1}/{len(energy_paths)}  loaded: {os.path.relpath(energy_path,top_dir)} Loading time: {loading_time} seconds"
    print(log1)
    log.append(log1)
    counters["load"].append(loading_time)
    start2 = time.time()
    o, cp, mean_trend = SLM_tools.beast(energy_data)
    segmentation_time = round(time.time() - start2,3)
    log2 = f"\nfinished segmentation(Beast), runtime: {segmentation_time} seconds"
    print(log2)
    log.append(log2)
    counters["segmentation"].append(segmentation_time)
    start3 = time.time()
    A = SLM_tools.feature_extraction(energy_data, distance_data,mean_trend,cp)
    extraction_time = round(time.time() - start3,3)
    print(f"\nfinished feature extraction, runtime: {extraction_time} seconds")
    counters["extraction"].append(extraction_time)
    print(f"\nAssembly? : {len(A) > 0}")
    if len(A) > 0:
        files_with_assembly += 1
        features_extracted += A[0].shape[0]
        counters["extracted_features"].append(A[0].shape[0])
        log3 = f"\nfiles_with_assembly: {files_with_assembly}/{total_data_files}"
        print(log3)
        log.append(log3)
        log4 = f"\nTotal features_extracted: {features_extracted}"
        print(log4)
        log.append(log4)
    else:
        log5 = f"\nNo Assembly, No features extracted"
        print(log5)
        log.append(log5)
    C.append(A)
    total_runtime += (loading_time + extraction_time + segmentation_time)
log6 = f"\nFinished pre-pca processing,total runtime {total_runtime} for {len(energy_paths)} files"
print(log6)
log.append(log6)
c_path = os.path.join(save_path,f"C_all_mu_{mu}.pkl")
with open(c_path, "wb") as f:
    pickle.dump(C, f)
log7 = "\nsaved all processed data"
print(log7)
log.append(log7)
start_pca = time.time()
log8 = f"\nStarting PCA"
print(log8)
log.append(log8)
c_reduced = SLM_tools.feature_selection(C)
principal_components, score, latent = SLM_tools.pca(c_reduced,n_components)
a_reduced = SLM_tools.data_preparation(score, c_reduced,n_components)
counters["selected_features"] = a_reduced.shape[0]
runtime_pca = round(time.time() - start_pca,3)
counters["pca"] = runtime_pca
total_runtime += runtime_pca
log9 = f"\nFinished PCA runtime {runtime_pca} seconds"
print(log9)
log.append(log9)
a_path = os.path.join(save_path,f"a_reduced_all_mu_{mu}.mat")
savemat(a_path, {'a_reduced': a_reduced})
log10 = f"\nsaved all extracted features , total extracted features {features_extracted} total runtime {total_runtime} seconds"
log.append(log10)
log.append("SUMMARY")
log11 = f"\nfiles {total_data_files} \nfiles_with_assembly: {files_with_assembly}/{total_data_files} \nmean extracted features {np.mean(counters['extracted_features'])} \ntotal extracted features {features_extracted} \ntotal_selected_features {counters['selected_features']} \ntotal runtime {total_runtime} seconds \nmean loading time {np.mean(counters['load'])} \nmean segmentation time {np.mean(counters['segmentation'])} \nmean extraction time {np.mean(counters['extraction'])} \nPCA time {counters['pca']}"
log.append(log11)
print(log11)
log_path = os.path.join(save_path,f"run_log_mu_{mu}.txt",)
with open(log_path,'w') as f:
    for line in log:
        f.write(line)
        

In [None]:
a_reduced.shape

In [None]:
a_reduced_2 = sio.loadmat(r"C:\Users\User\OneDrive - mail.tau.ac.il\Documents\SA_UI\selected_features_03_08_17_46.mat")["selected_features"]

In [None]:
a_reduced_2.shape

In [None]:
a_reduced = sio.loadmat(r"C:\Users\User\OneDrive - mail.tau.ac.il\Documents\SA_UI\testing data\1.6_v3\a_reduced_all_mu_1.6.mat")["a_reduced"]

In [None]:
a_reduced.shape

In [None]:
import os
import numpy as np
from scipy import interpolate
import scipy.io
import Rbeast as rb
from SLM_tools import *
from scipy.io import savemat
import pickle
import re
import time

In [None]:
a_reduced = sio.loadmat(r"C:\Users\User\OneDrive - mail.tau.ac.il\Documents\SA_UI\testing data\1.6_v3\a_reduced_all_mu_1.6.mat")["a_reduced"]
path = r"C:\Users\User\OneDrive - mail.tau.ac.il\Documents\SA_UI\testing data"
save_path = os.path.join(path, r"1.6_v2")

In [None]:
YI, tfas_predict_mat, tfas_actually_mat, mean_error_mat,random_x = SLM_tools.model_training_with_cv(a_reduced,n_components=3,cv_num=10)

In [None]:
SLM_tools.draw_stochastic_landscape_2d(a_reduced,save_path,3)

In [None]:
cv_num = 10
mean_vec, std_vec, hist_space, x_hist_space, x_ticks, y_ticks = SLM_tools.model_eval(tfas_predict_mat, tfas_actually_mat, cv_num, save_path)

In [None]:
tfas_predict_mat_2, tfas_actually_mat_2, mean_error_mat_2 = SLM_tools.train_again_on_validation_and_test(a_reduced, n_components=3)

In [None]:
SLM_tools.cv_bias_correction(tfas_predict_mat_2=tfas_predict_mat_2, tfas_actually_mat_2=tfas_actually_mat_2, hist_space=hist_space, mean_vec=mean_vec, x_hist_space=x_hist_space,x_ticks=x_ticks,y_ticks=y_ticks,save_path=save_path)