# Notebook for making test runs

Libraries


In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.fft import fft, fftfreq
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC
from xgboost import XGBClassifier
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from keras.optimizers import Adam
from keras.utils import to_categorical
from sklearn.neighbors import KNeighborsClassifier
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense
from keras.regularizers import l2
from keras.callbacks import EarlyStopping

# Librerias GTDA
from gtda.time_series import SingleTakensEmbedding, takens_embedding_optimal_parameters
from gtda.homology import VietorisRipsPersistence
from gtda.plotting import plot_point_cloud
import plotly.graph_objects as go
#from nolitsa import dimension, delay (nolitsa==0.1)

import math
import gudhi
import ripser
from persim import plot_diagrams, PersistenceImager

In [2]:
def compute_frequency_domain(signals, start_idx, end_idx, sampling_rate=1.0):
    frequency_domain_data = []
    for i in range(start_idx, end_idx):
        # Compute the FFT of the signal
        signal_fft = fft(signals[i])
        # Compute the corresponding frequencies
        frequencies = fftfreq(len(signal_fft), d=1/sampling_rate)
        # Filter frequencies and magnitudes in the range -0.05 to 0.05
        filtered_indices = np.where((frequencies >= -0.05) & (frequencies <= 0.05))
        filtered_frequencies = frequencies[filtered_indices].tolist()
        filtered_magnitudes = np.abs(signal_fft)[filtered_indices].tolist()
        # Store filtered frequencies and magnitudes
        frequency_domain_data.append({
            'Signal': i+1, 
            'Frequencies': filtered_frequencies, 
            'Magnitudes': filtered_magnitudes
        })
    return frequency_domain_data

def procesamiento_pca(datos_np, hacerPCA: bool=True):
    frequency_domain_results = compute_frequency_domain(datos_np, start_idx=0, end_idx=len(datos_np), sampling_rate=1.0)

    # Convert the results into a DataFrame
    frequency_domain_df = pd.DataFrame(frequency_domain_results)

    # Extracting magnitudes of Fourier series data
    magnitudes = frequency_domain_df['Magnitudes'].values

    # Reshaping magnitudes to match the expected input shape for t-SNE
    reshaped_magnitudes = np.vstack(magnitudes)

    # Normalize the data
    scaler = StandardScaler()
    normalized_magnitudes = scaler.fit_transform(reshaped_magnitudes)

    #En caso de poner false
    pca_result = normalized_magnitudes

    # Optionally apply PCA before t-SNE
    if hacerPCA:
        pca = PCA(n_components=50)
        pca_result = pca.fit_transform(normalized_magnitudes)
    

    return pca_result

# Load the models

In [24]:
# load the models
import joblib
import pickle

# Load random forest model
rf_model = joblib.load('models/random_forest_model.pkl')
# Load XGBoost model
xgb_model = joblib.load('models/xgb_model.pkl')
# Load neural network model
from keras.models import load_model
nn_model = load_model('models/neural_network_model.keras')
# Load CNN model
cnn_model = load_model('models/cnn_model.keras')
# Load the svm model
svm_model = joblib.load('models/svm_model.pkl')
# tda model
tda_model = load_model('data/cnn1_model_0.4.keras')


# Load the data

In [21]:
signal_name= 'noisy_signals'
labels_name ='labels'
#list_of_R=[0.6, 0.5, 0.4, 0.2, 0.15, 0.065]
list_of_R = ['rand100', 'rand250', 'rand500']

for i in list_of_R:
    # Load the data
    datos_np = np.load(f'data/{signal_name}_{i}.npy')
    labels = np.load(f'data/{labels_name}_{i}.npy')

    # Process the data using PCA
    X_test = procesamiento_pca(datos_np)
    y_test = labels

    y_pred_ruido = rf_model.predict(X_test)
    print(y_pred_ruido)
    print(y_test.shape)

    # Calculate accuracy
    accuracy_ruido = accuracy_score(y_test, y_pred_ruido)
    print("Accuracy:", accuracy_ruido)


[1. 1. 1. ... 0. 0. 0.]
(1500,)
Accuracy: 0.5086666666666667
[1. 1. 1. ... 0. 0. 0.]
(1500,)
Accuracy: 0.5033333333333333
[1. 1. 1. ... 0. 0. 0.]
(1500,)
Accuracy: 0.49333333333333335


In [22]:
signal_name= 'noisy_signals'
labels_name ='labels'
#list_of_R=[0.6, 0.5, 0.4, 0.2, 0.15, 0.065]
list_of_R = ['rand100', 'rand250', 'rand500']

for i in list_of_R:
    # Load the data
    datos_np = np.load(f'data/{signal_name}_{i}.npy')
    labels = np.load(f'data/{labels_name}_{i}.npy')

    # Process the data using PCA
    X_test = procesamiento_pca(datos_np)
    y_test = labels

    y_pred_ruido = svm_model.predict(X_test)
    print(y_pred_ruido)
    print(y_test.shape)

    # Calculate accuracy
    accuracy_ruido = accuracy_score(y_test, y_pred_ruido)
    print("Accuracy:", accuracy_ruido)

[1. 1. 1. ... 0. 0. 0.]
(1500,)
Accuracy: 0.51
[1. 1. 1. ... 0. 0. 0.]
(1500,)
Accuracy: 0.5046666666666667
[1. 1. 1. ... 0. 0. 0.]
(1500,)
Accuracy: 0.49933333333333335


In [23]:
signal_name= 'noisy_signals'
labels_name ='labels'
#list_of_R=[0.6, 0.5, 0.4, 0.2, 0.15, 0.065]
list_of_R = ['rand100', 'rand250', 'rand500']

for i in list_of_R:
    # Load the data
    datos_np = np.load(f'data/{signal_name}_{i}.npy')
    labels = np.load(f'data/{labels_name}_{i}.npy')

    # Process the data using PCA
    X_test = procesamiento_pca(datos_np)
    y_test = labels

    y_pred_ruido = xgb_model.predict(X_test)
    print(y_pred_ruido)
    print(y_test.shape)

    # Calculate accuracy
    accuracy_ruido = accuracy_score(y_test, y_pred_ruido)
    print("Accuracy:", accuracy_ruido)

[1 1 1 ... 0 0 0]
(1500,)
Accuracy: 0.5186666666666667
[1 1 1 ... 0 0 0]
(1500,)
Accuracy: 0.5086666666666667
[1 1 1 ... 0 0 0]
(1500,)
Accuracy: 0.496


In [26]:
signal_name= 'imagenes'
labels_name ='labels'
#list_of_R=[0.6, 0.5, 0.4, 0.2, 0.15, 0.065]
list_of_R = ['rand100', 'rand250', 'rand500']

for i in list_of_R:
    # Load the data
    #load list imagenes as numpy array
    imagenes=np.load(f'data/{signal_name}_{i}.npy', allow_pickle=True)
    labels = np.load(f'data/{labels_name}_{i}.npy')

    lol=[]
    for i in range(len(imagenes)):
        lol.append(imagenes[i].flatten())
    imagenes=np.array(lol)
    imagenes.shape

    X = imagenes

    y=labels

    # Reshape the data to add a channel dimension
    X = X.reshape(-1, imagenes.shape[1], 1)

    # Evaluate the model
    val_loss, val_accuracy = tda_model.evaluate(X, y)
    print(f'Validation Loss: {val_loss}, Validation Accuracy: {val_accuracy}')

[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 238ms/step - accuracy: 0.5360 - loss: 0.7462
Validation Loss: 0.7446233630180359, Validation Accuracy: 0.5379999876022339
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 222ms/step - accuracy: 0.5155 - loss: 0.7487
Validation Loss: 0.751818060874939, Validation Accuracy: 0.5040000081062317
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 217ms/step - accuracy: 0.5423 - loss: 0.7476
Validation Loss: 0.7469534277915955, Validation Accuracy: 0.5286666750907898
