# Notebook for making test runs

Libraries


In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.fft import fft, fftfreq
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, recall_score
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC
from xgboost import XGBClassifier
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from keras.optimizers import Adam
from keras.utils import to_categorical
from sklearn.neighbors import KNeighborsClassifier
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense
from keras.regularizers import l2
from keras.callbacks import EarlyStopping

# Librerias GTDA
from gtda.time_series import SingleTakensEmbedding, takens_embedding_optimal_parameters
from gtda.homology import VietorisRipsPersistence
from gtda.plotting import plot_point_cloud
import plotly.graph_objects as go
#from nolitsa import dimension, delay (nolitsa==0.1)

import math
import gudhi
import ripser
from persim import plot_diagrams, PersistenceImager

In [2]:
def compute_frequency_domain(signals, start_idx, end_idx, sampling_rate=1.0):
    frequency_domain_data = []
    for i in range(start_idx, end_idx):
        # Compute the FFT of the signal
        signal_fft = fft(signals[i])
        # Compute the corresponding frequencies
        frequencies = fftfreq(len(signal_fft), d=1/sampling_rate)
        # Filter frequencies and magnitudes in the range -0.05 to 0.05
        filtered_indices = np.where((frequencies >= -0.05) & (frequencies <= 0.05))
        filtered_frequencies = frequencies[filtered_indices].tolist()
        filtered_magnitudes = np.abs(signal_fft)[filtered_indices].tolist()
        # Store filtered frequencies and magnitudes
        frequency_domain_data.append({
            'Signal': i+1, 
            'Frequencies': filtered_frequencies, 
            'Magnitudes': filtered_magnitudes
        })
    return frequency_domain_data

def procesamiento_pca(datos_np, hacerPCA: bool=True):
    frequency_domain_results = compute_frequency_domain(datos_np, start_idx=0, end_idx=len(datos_np), sampling_rate=1.0)

    # Convert the results into a DataFrame
    frequency_domain_df = pd.DataFrame(frequency_domain_results)

    # Extracting magnitudes of Fourier series data
    magnitudes = frequency_domain_df['Magnitudes'].values

    # Reshaping magnitudes to match the expected input shape for t-SNE
    reshaped_magnitudes = np.vstack(magnitudes)

    # Normalize the data
    scaler = StandardScaler()
    normalized_magnitudes = scaler.fit_transform(reshaped_magnitudes)

    #En caso de poner false
    pca_result = normalized_magnitudes

    # Optionally apply PCA before t-SNE
    if hacerPCA:
        pca = PCA(n_components=50)
        pca_result = pca.fit_transform(normalized_magnitudes)
    

    return pca_result

# Load the models

In [3]:
# load the models
import joblib
import pickle

# Load random forest model
rf_model = joblib.load('models/random_forest_model.pkl')
# Load XGBoost model
xgb_model = joblib.load('models/xgb_model.pkl')
# Load logistic regression model
logistic_model = joblib.load('models/logistic_model.pkl')
# Load neural network model
from keras.models import load_model
nn_model1 = load_model('models/neural_network_model1.keras')
nn_model2 = load_model('models/neural_network_model2.keras')
# Load CNN model
cnn_model = load_model('models/cnn_model.keras')
# Load the svm model
svm_model = joblib.load('models/svm_model.pkl')
# tda model
tda_model = load_model('data/cnn1_model_0.4.keras')


AttributeError: module 'ml_dtypes' has no attribute 'float4_e2m1fn'


# Load the data

In [4]:
list_of_R=[0.65, 0.5, 0.4, 0.3,0.25,0.2, 0.15, 0.065, 'rand100', 'rand250', 'rand500']
lista_modelos=[rf_model, xgb_model, svm_model, logistic_model, nn_model1, nn_model2, cnn_model, tda_model]
nombres_modelos = ['Random Forest', 'XGBoost', 'SVM', 'Logistic Regression', 'Neural Network 1', 'Neural Network 2', 'CNN', 'TDA CNN']
df = pd.DataFrame(columns=list_of_R, index=nombres_modelos)
df

Unnamed: 0,0.65,0.5,0.4,0.3,0.25,0.2,0.15,0.065,rand100,rand250,rand500
Random Forest,,,,,,,,,,,
XGBoost,,,,,,,,,,,
SVM,,,,,,,,,,,
Logistic Regression,,,,,,,,,,,
Neural Network 1,,,,,,,,,,,
Neural Network 2,,,,,,,,,,,
CNN,,,,,,,,,,,
TDA CNN,,,,,,,,,,,


In [5]:
len(list_of_R)

11

In [6]:
def flatten_images(imagenes):
    """
    Flatten the images for input into the model.
    """
    flattened_images = []
    for img in imagenes:
        flattened_images.append(img.flatten())
    return np.array(flattened_images)

def create_folds(X,Y,Z, k=10):
    """
    Divide the data into k folds for cross-validation.
    """


    # Step 1: Create and shuffle indices
    indices = np.arange(1500)
    np.random.seed(42)
    np.random.shuffle(indices)

    # Step 2: Split indices into k folds
    fold_indices = np.array_split(indices, k)

    X_folded = []
    Y_folded = []
    Z_folded = []

    # Step 3: Use fold indices to slice each array
    for i, fold in enumerate(fold_indices):
        X_fold = X[fold]
        Y_fold = Y[fold]
        Z_fold = Z[fold]

        X_folded.append(X_fold)
        Y_folded.append(Y_fold)
        Z_folded.append(Z_fold)
        
       
        
    return X_folded, Y_folded, Z_folded


signal_name= 'noisy_signals'
labels_name ='labels'

print(len(lista_modelos), "models loaded.")

acc_global=[]
rec_global=[]
acc_desv_global=[]
rec_desv_global=[]

acc_completo=[]
rec_completo=[]

for i in list_of_R:
    print(f"----Processing R value: {i}-----")
    # Load the data
    datos_np = np.load(f'data/{signal_name}_{i}.npy')
    imagenes=np.load(f'data/imagenes_{i}.npy', allow_pickle=True)
    labels = np.load(f'data/{labels_name}_{i}.npy')
    imagenes = flatten_images(imagenes)
    
    

    # Divide datos_np and labels into 10 folds
    # Shuffle data
    

    # Create folds
    datos_np, labels, imagenes = create_folds(datos_np, labels, imagenes, k=10)
    

    # Initialize accuracy scores for each model
    acc_fold = []
    rec_fold = []
    
    

    for w in range(len(datos_np)):
        print(f"Fold {w+1} of 10")
        # Process the data using PCA
        X_train = procesamiento_pca(datos_np[w])
        y_train = labels[w]
        imagenes_fold = imagenes[w]

        acc_models = []
        rec_models = []
        for q in range(len(lista_modelos)):
            acc_models.append(0)
            rec_models.append(0)

        

        for j in range(len(lista_modelos)):
            if j<len(lista_modelos)-1:
                # Train the model
                y_pred=lista_modelos[j].predict(X_train)
            elif j==len(lista_modelos)-1:
                # Train the TDA model
                y_pred=lista_modelos[j].predict(imagenes_fold)

            # Calculate accuracy
            print(f"Model {j+1} ({nombres_modelos[j]}) - Fold {w+1}")
            if hasattr(lista_modelos[j], 'summary'):
                y_pred = (y_pred> 0.5).astype(int)
        
            accuracy = accuracy_score(y_train, y_pred)
            recall = recall_score(y_train, y_pred)
            print("Accuracy:", accuracy)
            
            # Update accuracy and recall for each model
            acc_models[j] = accuracy
            rec_models[j] = recall
            
            
        # Save the accuracy and recall for each model in the fold
        acc_fold.append(acc_models)
        rec_fold.append(rec_models)
    
    # Transpose the accuracy and recall lists to have models as rows and folds as columns
    
    acc_fold = np.array(acc_fold).T.tolist()
    rec_fold = np.array(rec_fold).T.tolist()
    
    print(acc_fold)
    print(rec_fold)
            
        
    # Calculate the average and standard deviation of accuracy and recall for each model

    acc_local=[]
    rec_local=[]
    acc_desv_local=[]
    rec_desv_local=[]
    for i,j in zip(acc_fold, rec_fold):

        acc_local.append(np.mean(i))
        rec_local.append(np.mean(j))
        acc_desv_local.append(np.std(i))
        rec_desv_local.append(np.std(j))

    acc_global.append(acc_local)
    rec_global.append(rec_local)
    acc_desv_global.append(acc_desv_local)
    rec_desv_global.append(rec_desv_local)

    # Save the whole matrix
    acc_completo.append(acc_fold)
    rec_completo.append(rec_fold)
    

        



            

    print(acc_global)
    print(rec_global)

    """"        
    # Process the data using PCA
    X_test = procesamiento_pca(datos_np)
    y_test = labels

    for j in range(len(lista_modelos)):
        if j<len(lista_modelos)-1:
            y_pred_ruido = lista_modelos[j].predict(X_test)
        elif j==len(lista_modelos)-1:
            y_pred_ruido = lista_modelos[j].predict(imagenes)

        # Calculate accuracy
        print(f"Model {j+1} ({nombres_modelos[j]}) - R: {j}")

        # if model is sequential, Convert probabilities to binary predictions
        if hasattr(lista_modelos[j], 'summary'):
            y_pred_ruido = (y_pred_ruido > 0.5).astype(int)
        accuracy_ruido = accuracy_score(y_test, y_pred_ruido)
        print("Accuracy:", accuracy_ruido)
    """

    


8 models loaded.
----Processing R value: 0.65-----
Fold 1 of 10
Model 1 (Random Forest) - Fold 1
Accuracy: 0.72
Model 2 (XGBoost) - Fold 1
Accuracy: 0.9466666666666667
Model 3 (SVM) - Fold 1
Accuracy: 0.9266666666666666
Model 4 (Logistic Regression) - Fold 1
Accuracy: 0.8933333333333333
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
Model 5 (Neural Network 1) - Fold 1
Accuracy: 0.8133333333333334
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
Model 6 (Neural Network 2) - Fold 1
Accuracy: 0.8866666666666667
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Model 7 (CNN) - Fold 1
Accuracy: 0.4666666666666667
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 210ms/step
Model 8 (TDA CNN) - Fold 1
Accuracy: 0.7666666666666667
Fold 2 of 10
Model 1 (Random Forest) - Fold 2
Accuracy: 0.9333333333333333
Model 2 (XGBoost) - Fold 2
Accuracy: 0.94
Model 3 (SVM) - Fold 2
Accuracy: 0.96
Model 4 (Logistic Regression) 

In [7]:
len(acc_completo)  # Check the length of the global accuracy list

11

In [8]:
# Save acc_completo and rec_completo as numpy arrays
np.save('data/acc_completo.npy', acc_completo)
np.save('data/rec_completo.npy', rec_completo)

In [9]:
df

Unnamed: 0,0.65,0.5,0.4,0.3,0.25,0.2,0.15,0.065,rand100,rand250,rand500
Random Forest,,,,,,,,,,,
XGBoost,,,,,,,,,,,
SVM,,,,,,,,,,,
Logistic Regression,,,,,,,,,,,
Neural Network 1,,,,,,,,,,,
Neural Network 2,,,,,,,,,,,
CNN,,,,,,,,,,,
TDA CNN,,,,,,,,,,,


In [10]:
# Change the name of the columns to str
df.columns = [str(col) for col in df.columns]
df

Unnamed: 0,0.65,0.5,0.4,0.3,0.25,0.2,0.15,0.065,rand100,rand250,rand500
Random Forest,,,,,,,,,,,
XGBoost,,,,,,,,,,,
SVM,,,,,,,,,,,
Logistic Regression,,,,,,,,,,,
Neural Network 1,,,,,,,,,,,
Neural Network 2,,,,,,,,,,,
CNN,,,,,,,,,,,
TDA CNN,,,,,,,,,,,


In [11]:
resultados =[acc_global, rec_global, acc_desv_global, rec_desv_global]
nombres_resultados = ['Accuracy', 'Recall', 'Accuracy Std Dev', 'Recall Std Dev']

for j in range(len(resultados)):


    for i in range(len(df.columns)):
        
        df[df.columns[i]]= resultados[j][i]
        

        
        # Save the DataFrame to a CSV file
        df.to_csv(f'resultados/resultados_{nombres_resultados[j]}.csv', index=False)
    



In [55]:
df

Unnamed: 0,0.6,0.5,0.4,0.3,0.2,0.15,0.065,rand100,rand250,rand500
Random Forest,0.026606,0.025823,0.027435,0.033709,0.025735,0.034677,0.031591,0.055264,0.05209,0.042907
XGBoost,0.023152,0.042841,0.029946,0.042677,0.022973,0.046481,0.044684,0.052359,0.055688,0.040028
SVM,0.027461,0.041293,0.019327,0.036978,0.016217,0.027919,0.031138,0.045145,0.050318,0.039424
Logistic Regression,0.054555,0.058256,0.043423,0.072035,0.030916,0.051179,0.040368,0.042734,0.048822,0.042127
Neural Network 1,0.088539,0.104563,0.081142,0.088794,0.043091,0.052861,0.055567,0.041323,0.040084,0.038828
Neural Network 2,0.057957,0.065476,0.064636,0.069285,0.037856,0.044611,0.048747,0.03979,0.047229,0.037383
CNN,0.018022,0.026703,0.031332,0.023818,0.019073,0.023867,0.012217,0.021825,0.035971,0.036574
TDA CNN,0.037959,0.064738,0.053475,0.05392,0.037901,0.04776,0.061943,0.038613,0.047167,0.0222
