# This notebook includes the results of several ML models applied to the problem using the data from PCA of Fourier Series

Using 50 PCA components of the magnitudes of the Fourier Series of a wave signal, we will try to create a model to predict whether the signal is a gravitational wave or just noise. 


In [1]:


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.fft import fft, fftfreq
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC
from xgboost import XGBClassifier
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from keras.optimizers import Adam
from keras.utils import to_categorical
from sklearn.neighbors import KNeighborsClassifier
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense
from keras.regularizers import l2
from keras.callbacks import EarlyStopping

# Librerias GTDA
from gtda.time_series import SingleTakensEmbedding, takens_embedding_optimal_parameters
from gtda.homology import VietorisRipsPersistence
from gtda.plotting import plot_point_cloud
import plotly.graph_objects as go
#from nolitsa import dimension, delay (nolitsa==0.1)

import math
import gudhi
import ripser
from persim import plot_diagrams, PersistenceImager



In [2]:
plt.rcParams.update(plt.rcParamsDefault) #Run this if the plots are not showing correctly

# Model with pca of Fourier series

### Original data (not used)

In [3]:
# DATA Fourier
fourier = pd.read_csv('fouriertable.csv')
fourier.head()

Unnamed: 0,Signal,Frequency,Magnitude,label
0,1,"[0.0, 0.0001150483202945237, 0.000230096640589...","[1.0145092655307018e-17, 1.3742328230303114e-1...",1.0
1,2,"[0.0, 0.0001150483202945237, 0.000230096640589...","[1.2012512630922278e-17, 9.408809696922871e-18...",1.0
2,3,"[0.0, 0.0001150483202945237, 0.000230096640589...","[4.953354143331156e-18, 1.1988600829190209e-17...",0.0
3,4,"[0.0, 0.0001150483202945237, 0.000230096640589...","[8.094916221989224e-18, 9.60860405296951e-18, ...",1.0
4,5,"[0.0, 0.0001150483202945237, 0.000230096640589...","[6.678644534662725e-18, 1.2868479172229612e-17...",1.0


In [7]:
pca = np.load('pca_result.npy')
pca = pd.DataFrame(pca)
# add the label column to the pca dataframe
pca['label'] = fourier['label']



display(pca.head())
print(pca.shape)



Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,41,42,43,44,45,46,47,48,49,label
0,37.627767,10.342046,0.078061,-5.288138,8.872239,1.43476,-0.110027,-0.648014,0.490423,-0.639894,...,-1.767158,0.772734,0.741328,1.263705,2.689977,-4.219663,-0.172278,0.808752,0.27581,1.0
1,14.276117,-7.163439,-3.086087,-1.45763,0.579997,-0.657228,-3.322131,2.059191,-1.014485,-0.516384,...,0.890924,-0.241787,1.423723,-2.70271,1.354544,-0.603018,-0.528865,-0.059551,2.20623,1.0
2,-5.561709,1.799668,1.055614,-4.437189,3.133624,-0.669611,0.005676,0.31873,-0.373604,0.19585,...,-1.984465,-0.065637,1.75212,-0.760853,0.243293,-0.034146,-2.378585,-0.102611,-0.145424,0.0
3,1.498112,-1.78872,3.855057,0.330659,-4.59659,-1.462437,-0.031987,0.153426,-1.298065,-1.089507,...,-0.610866,0.387044,-4.075057,3.388359,-0.7474,-0.817974,0.177122,2.763773,0.17529,1.0
4,-3.51621,0.224371,1.645021,0.399034,0.558233,0.865032,-2.585594,0.985361,1.466662,0.717391,...,-2.85965,-1.454379,-1.212596,-0.697088,-0.332959,-3.078199,-2.411767,-0.217808,-1.486168,1.0


(1500, 51)


### Additional Data for Validation

##### Preprocessing

In [2]:
def compute_frequency_domain(signals, start_idx, end_idx, sampling_rate=1.0):
    frequency_domain_data = []
    for i in range(start_idx, end_idx):
        # Compute the FFT of the signal
        signal_fft = fft(signals[i])
        # Compute the corresponding frequencies
        frequencies = fftfreq(len(signal_fft), d=1/sampling_rate)
        # Filter frequencies and magnitudes in the range -0.05 to 0.05
        filtered_indices = np.where((frequencies >= -0.05) & (frequencies <= 0.05))
        filtered_frequencies = frequencies[filtered_indices].tolist()
        filtered_magnitudes = np.abs(signal_fft)[filtered_indices].tolist()
        # Store filtered frequencies and magnitudes
        frequency_domain_data.append({
            'Signal': i+1, 
            'Frequencies': filtered_frequencies, 
            'Magnitudes': filtered_magnitudes
        })
    return frequency_domain_data

def procesamiento_pca(datos_np, hacerPCA: bool=True):
    frequency_domain_results = compute_frequency_domain(datos_np, start_idx=0, end_idx=len(datos_np), sampling_rate=1.0)

    # Convert the results into a DataFrame
    frequency_domain_df = pd.DataFrame(frequency_domain_results)

    # Extracting magnitudes of Fourier series data
    magnitudes = frequency_domain_df['Magnitudes'].values

    # Reshaping magnitudes to match the expected input shape for t-SNE
    reshaped_magnitudes = np.vstack(magnitudes)

    # Normalize the data
    scaler = StandardScaler()
    normalized_magnitudes = scaler.fit_transform(reshaped_magnitudes)

    #En caso de poner false
    pca_result = normalized_magnitudes

    # Optionally apply PCA before t-SNE
    if hacerPCA:
        pca = PCA(n_components=50)
        pca_result = pca.fit_transform(normalized_magnitudes)
    

    return pca_result

In [3]:
# TRAINING DATA
datos_np = np.load('data/noisy_signals.npy')
labels = np.load('data/labels.npy')

X = procesamiento_pca(datos_np)
y = labels


In [None]:




# DATOS VALIDACIÓN (baja) ( R = 0.25)
datos_np = np.load('data/noisy_signals_025.npy')
labels = np.load('data/labels_025.npy')

X_test_ruido = procesamiento_pca(datos_np)
y_test_ruido = labels


In [None]:


# DATOS VALIDACIÓN (baja) (R=0.5)
datos_np = np.load('data/noisy_signals_r_0.5.npy')
labels = np.load('data/labels_r_0.5.npy')

X_test_ruido_5 = procesamiento_pca(datos_np)
y_test_ruido_5 = labels


# DATOS VALIDACIÓN (baja) (R= 0.3)
datos_np = np.load('data/noisy_signals_r_0.3.npy')
labels = np.load('data/labels_r_0.3.npy')

X_test_ruido_3 = procesamiento_pca(datos_np)
y_test_ruido_3 = labels



# DATOS VALIDACIÓN (baja) (R= 0.15)
datos_np = np.load('data/noisy_signals_r_0.15.npy')
labels = np.load('data/labels_r_0.15.npy')

X_test_ruido_15 = procesamiento_pca(datos_np)
y_test_ruido_15 = labels


# DATOS VALIDACIÓN (baja) (R= 0.065)
datos_np = np.load('data/noisy_signals_r_0.065.npy')
labels = np.load('data/labels_r_0.065.npy')

X_test_ruido_065 = procesamiento_pca(datos_np)
y_test_ruido_065 = labels



# DATOS VALIDACIÓN (baja) (R=.2)
datos_np = np.load('data/noisy_signals_r_0.2.npy')
labels = np.load('data/labels_r_0.2.npy')

X_test_ruido_2 = procesamiento_pca(datos_np)
y_test_ruido_2 = labels






# DATOS VALIDACIÓN (alta) (R= 0.65)
datos_np = np.load('data/noisy_signals.npy')
labels = np.load('data/labels.npy')

X_test_ruido_65 = procesamiento_pca(datos_np)
y_test_ruido_65 = labels



### RandomForestClassifier

In [4]:

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Perform k-fold cross-validation
cv_scores = cross_val_score(model, X, y, cv=5)

# Calculate the average accuracy across folds
avg_accuracy = np.mean(cv_scores)
print("Average Accuracy (Cross-Validation):", avg_accuracy)

# Train the model on the entire training set
model.fit(X_train, y_train)

# Predict labels for the test set
y_pred = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Save the trained model
import joblib
joblib.dump(model, 'models/random_forest_model.pkl')

Average Accuracy (Cross-Validation): 0.9466666666666667
Accuracy: 0.95


['models/random_forest_model.pkl']

#### Validating data with more noise

In [29]:

y_pred_ruido = model.predict(X_test_ruido)

# Calculate accuracy
accuracy_ruido = accuracy_score(y_test_ruido, y_pred_ruido)
print("Accuracy:", accuracy_ruido)

Accuracy: 0.6066666666666667


#### Validating data with less noise

In [59]:

y_pred_ruido = model.predict(X_test_ruido_65)

# Calculate accuracy
accuracy_ruido = accuracy_score(y_test_ruido_65, y_pred_ruido)
print("Accuracy:", accuracy_ruido)

Accuracy: 0.976


In [60]:

y_pred_ruido = model.predict(X_test_ruido_15)

# Calculate accuracy
accuracy_ruido = accuracy_score(y_test_ruido_15, y_pred_ruido)
print("Accuracy:", accuracy_ruido)

Accuracy: 0.526


In [61]:

y_pred_ruido = model.predict(X_test_ruido_5)

# Calculate accuracy
accuracy_ruido = accuracy_score(y_test_ruido_5, y_pred_ruido)
print("Accuracy:", accuracy_ruido)

Accuracy: 0.9533333333333334


In [62]:

y_pred_ruido = model.predict(X_test_ruido_3)

# Calculate accuracy
accuracy_ruido = accuracy_score(y_test_ruido_3, y_pred_ruido)
print("Accuracy:", accuracy_ruido)

Accuracy: 0.77


In [63]:

y_pred_ruido = model.predict(X_test_ruido_065)

# Calculate accuracy
accuracy_ruido = accuracy_score(y_test_ruido_065, y_pred_ruido)
print("Accuracy:", accuracy_ruido)

Accuracy: 0.4846666666666667


In [64]:

y_pred_ruido = model.predict(X_test_ruido_2)

# Calculate accuracy
accuracy_ruido = accuracy_score(y_test_ruido_2, y_pred_ruido)
print("Accuracy:", accuracy_ruido)

Accuracy: 0.574


### SVM

In [5]:
# Create SVM classifier
svm_model = SVC(kernel='rbf', C=1.0, gamma='scale')

# Train the model
svm_model.fit(X_train, y_train)

# Predict labels for the test set
y_pred_svm = svm_model.predict(X_test)

# Calculate accuracy
accuracy_svm = accuracy_score(y_test, y_pred_svm)
print("SVM Accuracy:", accuracy_svm)
# Save the trained SVM model
joblib.dump(svm_model, 'models/svm_model.pkl')

SVM Accuracy: 0.9533333333333334


['models/svm_model.pkl']

In [66]:
y_pred_svm_ruido = svm_model.predict(X_test_ruido)

# Calculate accuracy
accuracy_svm_ruido = accuracy_score(y_test_ruido, y_pred_svm_ruido)
print("SVM Accuracy:", accuracy_svm_ruido)

SVM Accuracy: 0.76


In [67]:
y_pred_svm_ruido = svm_model.predict(X_test_ruido_65)

# Calculate accuracy
accuracy_svm_ruido = accuracy_score(y_test_ruido_65, y_pred_svm_ruido)
print("SVM Accuracy:", accuracy_svm_ruido)

SVM Accuracy: 0.9233333333333333


In [68]:
y_pred_svm_ruido = svm_model.predict(X_test_ruido_15)

# Calculate accuracy
accuracy_svm_ruido = accuracy_score(y_test_ruido_15, y_pred_svm_ruido)
print("SVM Accuracy:", accuracy_svm_ruido)

SVM Accuracy: 0.5686666666666667


In [69]:
y_pred_svm_ruido = svm_model.predict(X_test_ruido_5)

# Calculate accuracy
accuracy_svm_ruido = accuracy_score(y_test_ruido_5, y_pred_svm_ruido)
print("SVM Accuracy:", accuracy_svm_ruido)

SVM Accuracy: 0.904


In [70]:
y_pred_svm_ruido = svm_model.predict(X_test_ruido_3)

# Calculate accuracy
accuracy_svm_ruido = accuracy_score(y_test_ruido_3, y_pred_svm_ruido)
print("SVM Accuracy:", accuracy_svm_ruido)

SVM Accuracy: 0.8646666666666667


In [71]:
y_pred_svm_ruido = svm_model.predict(X_test_ruido_065)

# Calculate accuracy
accuracy_svm_ruido = accuracy_score(y_test_ruido_065, y_pred_svm_ruido)
print("SVM Accuracy:", accuracy_svm_ruido)

SVM Accuracy: 0.48933333333333334


In [72]:
y_pred_svm_ruido = svm_model.predict(X_test_ruido_2)

# Calculate accuracy
accuracy_svm_ruido = accuracy_score(y_test_ruido_2, y_pred_svm_ruido)
print("SVM Accuracy:", accuracy_svm_ruido)

SVM Accuracy: 0.672


### XGBoost

In [6]:
# Create XGBoost classifier
xgb_model = XGBClassifier(n_estimators=100, learning_rate=0.1)

# Train the model
xgb_model.fit(X_train, y_train)

# Predict labels for the test set
y_pred_xgb = xgb_model.predict(X_test)

# Calculate accuracy
accuracy_xgb = accuracy_score(y_test, y_pred_xgb)
print("XGBoost Accuracy:", accuracy_xgb)
# save the trained XGBoost model
joblib.dump(xgb_model, 'models/xgb_model.pkl')

XGBoost Accuracy: 0.95


['models/xgb_model.pkl']

In [74]:
y_pred_xgb_ruido = xgb_model.predict(X_test_ruido)

# Calculate accuracy
accuracy_xgb = accuracy_score(y_test_ruido, y_pred_xgb_ruido)
print("XGBoost Accuracy:", accuracy_xgb)

XGBoost Accuracy: 0.6513333333333333


In [75]:
y_pred_xgb_ruido = xgb_model.predict(X_test_ruido_65)

# Calculate accuracy
accuracy_xgb = accuracy_score(y_test_ruido_65, y_pred_xgb_ruido)
print("XGBoost Accuracy:", accuracy_xgb)

XGBoost Accuracy: 0.9766666666666667


In [76]:
y_pred_xgb_ruido = xgb_model.predict(X_test_ruido_15)

# Calculate accuracy
accuracy_xgb = accuracy_score(y_test_ruido_15, y_pred_xgb_ruido)
print("XGBoost Accuracy:", accuracy_xgb)

XGBoost Accuracy: 0.538


In [77]:
y_pred_xgb_ruido = xgb_model.predict(X_test_ruido_5)

# Calculate accuracy
accuracy_xgb = accuracy_score(y_test_ruido_5, y_pred_xgb_ruido)
print("XGBoost Accuracy:", accuracy_xgb)

XGBoost Accuracy: 0.974


In [78]:
y_pred_xgb_ruido = xgb_model.predict(X_test_ruido_3)

# Calculate accuracy
accuracy_xgb = accuracy_score(y_test_ruido_3, y_pred_xgb_ruido)
print("XGBoost Accuracy:", accuracy_xgb)

XGBoost Accuracy: 0.7893333333333333


In [79]:
y_pred_xgb_ruido = xgb_model.predict(X_test_ruido_065)

# Calculate accuracy
accuracy_xgb = accuracy_score(y_test_ruido_065, y_pred_xgb_ruido)
print("XGBoost Accuracy:", accuracy_xgb)

XGBoost Accuracy: 0.47933333333333333


### Logistic Regression

In [7]:
# Create logistic regression classifier with L2 regularization (Ridge)
logistic_model = LogisticRegression(penalty='l2', C=1.0)

# Train the model
logistic_model.fit(X_train, y_train)

# Predict labels for the test set
y_pred_logistic = logistic_model.predict(X_test)

# Calculate accuracy
accuracy_logistic = accuracy_score(y_test, y_pred_logistic)
print("Logistic Regression Accuracy:", accuracy_logistic)

Logistic Regression Accuracy: 0.9433333333333334


In [81]:
# Predict labels for the test set
y_pred_logistic_ruido = logistic_model.predict(X_test_ruido)

# Calculate accuracy
accuracy_logistic_ruido = accuracy_score(y_test_ruido, y_pred_logistic_ruido)
print("Logistic Regression Accuracy:", accuracy_logistic_ruido)

Logistic Regression Accuracy: 0.7066666666666667


In [82]:
# Predict labels for the test set
y_pred_logistic_ruido = logistic_model.predict(X_test_ruido_65)

# Calculate accuracy
accuracy_logistic_ruido = accuracy_score(y_test_ruido_65, y_pred_logistic_ruido)
print("Logistic Regression Accuracy:", accuracy_logistic_ruido)

Logistic Regression Accuracy: 0.952


In [83]:
# Predict labels for the test set
y_pred_logistic_ruido = logistic_model.predict(X_test_ruido_15)

# Calculate accuracy
accuracy_logistic_ruido = accuracy_score(y_test_ruido_15, y_pred_logistic_ruido)
print("Logistic Regression Accuracy:", accuracy_logistic_ruido)

Logistic Regression Accuracy: 0.5266666666666666


In [84]:
# Predict labels for the test set
y_pred_logistic_ruido = logistic_model.predict(X_test_ruido_5)

# Calculate accuracy
accuracy_logistic_ruido = accuracy_score(y_test_ruido_5, y_pred_logistic_ruido)
print("Logistic Regression Accuracy:", accuracy_logistic_ruido)

Logistic Regression Accuracy: 0.9013333333333333


In [85]:
# Predict labels for the test set
y_pred_logistic_ruido = logistic_model.predict(X_test_ruido_3)

# Calculate accuracy
accuracy_logistic_ruido = accuracy_score(y_test_ruido_3, y_pred_logistic_ruido)
print("Logistic Regression Accuracy:", accuracy_logistic_ruido)

Logistic Regression Accuracy: 0.808


In [86]:
# Predict labels for the test set
y_pred_logistic_ruido = logistic_model.predict(X_test_ruido_065)

# Calculate accuracy
accuracy_logistic_ruido = accuracy_score(y_test_ruido_065, y_pred_logistic_ruido)
print("Logistic Regression Accuracy:", accuracy_logistic_ruido)

Logistic Regression Accuracy: 0.488


### Neural Network 1

In [10]:
# Create a Sequential model
nn_model = Sequential()

# Add input layer and hidden layers with dropout
nn_model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
nn_model.add(Dropout(0.5))
nn_model.add(Dense(32, activation='relu'))
nn_model.add(Dropout(0.5))

# Add output layer
nn_model.add(Dense(1, activation='sigmoid'))

# Compile the model
nn_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
nn_model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

# Evaluate the model on test set
accuracy_nn = nn_model.evaluate(X_test, y_test)[1]
print("Neural Network Accuracy:", accuracy_nn)

# Save the trained neural network model
nn_model.save('models/neural_network_model.keras')

Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.5103 - loss: 1.3289 - val_accuracy: 0.6875 - val_loss: 0.5484
Epoch 2/10
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6536 - loss: 0.8321 - val_accuracy: 0.7833 - val_loss: 0.4326
Epoch 3/10
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7111 - loss: 0.6495 - val_accuracy: 0.8333 - val_loss: 0.3791
Epoch 4/10
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7276 - loss: 0.6106 - val_accuracy: 0.8542 - val_loss: 0.3366
Epoch 5/10
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7412 - loss: 0.5447 - val_accuracy: 0.8750 - val_loss: 0.3059
Epoch 6/10
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7907 - loss: 0.4334 - val_accuracy: 0.8917 - val_loss: 0.2766
Epoch 7/10
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━

In [9]:
y_pred_nn = nn_model.predict(X_test_ruido)
y_pred_nn_binary = (y_pred_nn > 0.5).astype(int)  # Convert probabilities to binary predictions

# Calculate accuracy for the new data
accuracy_nn_new = accuracy_score(y_test_ruido, y_pred_nn_binary)
print("New Data Neural Network Accuracy:", accuracy_nn_new)

NameError: name 'X_test_ruido' is not defined

In [89]:
y_pred_nn = nn_model.predict(X_test_ruido_65)
y_pred_nn_binary = (y_pred_nn > 0.5).astype(int)  # Convert probabilities to binary predictions

# Calculate accuracy for the new data
accuracy_nn_new = accuracy_score(y_test_ruido_65, y_pred_nn_binary)
print("New Data Neural Network Accuracy:", accuracy_nn_new)

New Data Neural Network Accuracy: 0.81


In [90]:
y_pred_nn = nn_model.predict(X_test_ruido_15)
y_pred_nn_binary = (y_pred_nn > 0.5).astype(int)  # Convert probabilities to binary predictions

# Calculate accuracy for the new data
accuracy_nn_new = accuracy_score(y_test_ruido_15, y_pred_nn_binary)
print("New Data Neural Network Accuracy:", accuracy_nn_new)

New Data Neural Network Accuracy: 0.5486666666666666


In [91]:
y_pred_nn = nn_model.predict(X_test_ruido_5)
y_pred_nn_binary = (y_pred_nn > 0.5).astype(int)  # Convert probabilities to binary predictions

# Calculate accuracy for the new data
accuracy_nn_new = accuracy_score(y_test_ruido_5, y_pred_nn_binary)
print("New Data Neural Network Accuracy:", accuracy_nn_new)

New Data Neural Network Accuracy: 0.806


In [92]:
y_pred_nn = nn_model.predict(X_test_ruido_3)
y_pred_nn_binary = (y_pred_nn > 0.5).astype(int)  # Convert probabilities to binary predictions

# Calculate accuracy for the new data
accuracy_nn_new = accuracy_score(y_test_ruido_3, y_pred_nn_binary)
print("New Data Neural Network Accuracy:", accuracy_nn_new)

New Data Neural Network Accuracy: 0.84


In [93]:
y_pred_nn = nn_model.predict(X_test_ruido_065)
y_pred_nn_binary = (y_pred_nn > 0.5).astype(int)  # Convert probabilities to binary predictions

# Calculate accuracy for the new data
accuracy_nn_new = accuracy_score(y_test_ruido_065, y_pred_nn_binary)
print("New Data Neural Network Accuracy:", accuracy_nn_new)

New Data Neural Network Accuracy: 0.49733333333333335


### Neural Network (improved)

In [12]:


# Create a Sequential model
nn_model = Sequential()

# Add input layer and hidden layers with dropout and batch normalization
nn_model.add(Dense(64, input_dim=X_train.shape[1], activation='relu', kernel_regularizer=l2(0.001)))
nn_model.add(BatchNormalization())
nn_model.add(Dropout(0.5))
nn_model.add(Dense(32, activation='relu', kernel_regularizer=l2(0.001)))
nn_model.add(BatchNormalization())
nn_model.add(Dropout(0.5))

# Add output layer
nn_model.add(Dense(1, activation='sigmoid'))

# Compile the model
nn_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Implement early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the model
nn_model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2, callbacks=[early_stopping])

# Evaluate the model on test set
accuracy_nn = nn_model.evaluate(X_test, y_test)[1]
print("Neural Network Accuracy:", accuracy_nn)

# Save the trained neural network model
nn_model.save('models/neural_network_model.keras')


Epoch 1/100
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.5199 - loss: 1.0196 - val_accuracy: 0.7292 - val_loss: 0.6510
Epoch 2/100
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6512 - loss: 0.7882 - val_accuracy: 0.8292 - val_loss: 0.5405
Epoch 3/100
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7212 - loss: 0.6738 - val_accuracy: 0.8625 - val_loss: 0.4808
Epoch 4/100
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7826 - loss: 0.5615 - val_accuracy: 0.8792 - val_loss: 0.4413
Epoch 5/100
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8104 - loss: 0.5122 - val_accuracy: 0.8917 - val_loss: 0.4024
Epoch 6/100
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8404 - loss: 0.4621 - val_accuracy: 0.8917 - val_loss: 0.3743
Epoch 7/100
[1m30/30[0m [32m━━━

In [95]:
y_pred_nn = nn_model.predict(X_test_ruido)
y_pred_nn_binary = (y_pred_nn > 0.5).astype(int)  # Convert probabilities to binary predictions

# Calculate accuracy for the new data
accuracy_nn_new = accuracy_score(y_test_ruido, y_pred_nn_binary)
print("New Data Neural Network Accuracy:", accuracy_nn_new)

New Data Neural Network Accuracy: 0.742


In [96]:
y_pred_nn = nn_model.predict(X_test_ruido_65)
y_pred_nn_binary = (y_pred_nn > 0.5).astype(int)  # Convert probabilities to binary predictions

# Calculate accuracy for the new data
accuracy_nn_new = accuracy_score(y_test_ruido_65, y_pred_nn_binary)
print("New Data Neural Network Accuracy:", accuracy_nn_new)

New Data Neural Network Accuracy: 0.9


In [97]:
y_pred_nn = nn_model.predict(X_test_ruido_15)
y_pred_nn_binary = (y_pred_nn > 0.5).astype(int)  # Convert probabilities to binary predictions

# Calculate accuracy for the new data
accuracy_nn_new = accuracy_score(y_test_ruido_15, y_pred_nn_binary)
print("New Data Neural Network Accuracy:", accuracy_nn_new)

New Data Neural Network Accuracy: 0.554


In [98]:
y_pred_nn = nn_model.predict(X_test_ruido_5)
y_pred_nn_binary = (y_pred_nn > 0.5).astype(int)  # Convert probabilities to binary predictions

# Calculate accuracy for the new data
accuracy_nn_new = accuracy_score(y_test_ruido_5, y_pred_nn_binary)
print("New Data Neural Network Accuracy:", accuracy_nn_new)

New Data Neural Network Accuracy: 0.8313333333333334


In [99]:
y_pred_nn = nn_model.predict(X_test_ruido_3)
y_pred_nn_binary = (y_pred_nn > 0.5).astype(int)  # Convert probabilities to binary predictions

# Calculate accuracy for the new data
accuracy_nn_new = accuracy_score(y_test_ruido_3, y_pred_nn_binary)
print("New Data Neural Network Accuracy:", accuracy_nn_new)

New Data Neural Network Accuracy: 0.8493333333333334


In [100]:
y_pred_nn = nn_model.predict(X_test_ruido_065)
y_pred_nn_binary = (y_pred_nn > 0.5).astype(int)  # Convert probabilities to binary predictions

# Calculate accuracy for the new data
accuracy_nn_new = accuracy_score(y_test_ruido_065, y_pred_nn_binary)
print("New Data Neural Network Accuracy:", accuracy_nn_new)

New Data Neural Network Accuracy: 0.49866666666666665


### CNN

In [13]:


# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Reshape data for CNN input (assuming 1D CNN)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

# Define the CNN model
model = Sequential()

# Convolutional layer
model.add(Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)))

# Pooling layer
model.add(MaxPooling1D(pool_size=2))

# Flatten layer
model.add(Flatten())

# Fully connected layer
model.add(Dense(64, activation='relu'))

# Output layer
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy}')
# Save the trained model
model.save('models/cnn_model.keras')


Epoch 1/20


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - accuracy: 0.5305 - loss: 0.6842 - val_accuracy: 0.6133 - val_loss: 0.6584
Epoch 2/20
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6741 - loss: 0.6121 - val_accuracy: 0.6467 - val_loss: 0.6241
Epoch 3/20
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7267 - loss: 0.5402 - val_accuracy: 0.6733 - val_loss: 0.5899
Epoch 4/20
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7955 - loss: 0.4781 - val_accuracy: 0.6633 - val_loss: 0.5942
Epoch 5/20
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8150 - loss: 0.4397 - val_accuracy: 0.7167 - val_loss: 0.5394
Epoch 6/20
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8546 - loss: 0.3719 - val_accuracy: 0.7300 - val_loss: 0.5466
Epoch 7/20
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━

In [102]:
X_test_ruido = scaler.transform(X_test_ruido)

# Reshape X_test_ruido for CNN input
X_test_ruido = X_test_ruido.reshape(X_test_ruido.shape[0], X_test_ruido.shape[1], 1)

# Predict labels for X_test_ruido
y_pred_ruido = model.predict(X_test_ruido)

# Convert predicted probabilities to binary predictions (0 or 1)
y_pred_binary = (y_pred_ruido > 0.5).astype(int).flatten()

# Ensure y_test_ruido is a 1-dimensional array
y_test_ruido = np.array(y_test_ruido).flatten()

# Evaluate accuracy
accuracy_ruido = np.mean(y_pred_binary == y_test_ruido)
print(f'Accuracy on X_test_ruido: {accuracy_ruido}')

Accuracy on X_test_ruido: 0.7546666666666667


In [103]:
X_test_ruido = scaler.transform(X_test_ruido_65)

# Reshape X_test_ruido for CNN input
X_test_ruido = X_test_ruido.reshape(X_test_ruido.shape[0], X_test_ruido.shape[1], 1)

# Predict labels for X_test_ruido
y_pred_ruido = model.predict(X_test_ruido)

# Convert predicted probabilities to binary predictions (0 or 1)
y_pred_binary = (y_pred_ruido > 0.5).astype(int).flatten()

# Ensure y_test_ruido is a 1-dimensional array
y_test_ruido = np.array(y_test_ruido_65).flatten()

# Evaluate accuracy
accuracy_ruido = np.mean(y_pred_binary == y_test_ruido)
print(f'Accuracy on X_test_ruido: {accuracy_ruido}')

Accuracy on X_test_ruido: 0.4093333333333333


In [104]:
X_test_ruido = scaler.transform(X_test_ruido_15)

# Reshape X_test_ruido for CNN input
X_test_ruido = X_test_ruido.reshape(X_test_ruido.shape[0], X_test_ruido.shape[1], 1)

# Predict labels for X_test_ruido
y_pred_ruido = model.predict(X_test_ruido)

# Convert predicted probabilities to binary predictions (0 or 1)
y_pred_binary = (y_pred_ruido > 0.5).astype(int).flatten()

# Ensure y_test_ruido is a 1-dimensional array
y_test_ruido = np.array(y_test_ruido_65).flatten()

# Evaluate accuracy
accuracy_ruido = np.mean(y_pred_binary == y_test_ruido)
print(f'Accuracy on X_test_ruido: {accuracy_ruido}')

Accuracy on X_test_ruido: 0.498
