In [None]:
# Added more layers in LTSM Models for Good Results with Dense etc.,

import pandas as pd
import numpy as np
import os
from sklearn.cluster import KMeans
from sklearn.preprocessing import MinMaxScaler
from keras._tf_keras.keras.models import Sequential 
from keras._tf_keras.keras.layers import Dense, LSTM, Dropout

def preprocess_csv(file_path):
    try:
        df = pd.read_csv(file_path)
        vrm_data = df['VRM'].values.reshape(-1, 1)
        
        # Check for NaN values
        if np.any(np.isnan(vrm_data)):
            print(f"NaN values found in {file_path}.")
            # Check if there are any valid values to compute the mean
            if np.count_nonzero(~np.isnan(vrm_data)) > 0:
                mean_value = np.nanmean(vrm_data)
                print(f"Filling NaNs with the mean: {mean_value}")
                vrm_data = np.nan_to_num(vrm_data, nan=mean_value)
            else:
                print(f"All values are NaN in {file_path}. Skipping this file.")
                return None  # Skip this file if all values are NaN
        
        scaler = MinMaxScaler()
        vrm_data_scaled = scaler.fit_transform(vrm_data)
        return vrm_data_scaled
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

def fourier_transform(data):
    fft_data = np.fft.fft(data)
    fft_data = np.abs(fft_data)
    return fft_data

folder_path = 'testing_data_trial_2'

processed_data = []
file_names = []
for filename in os.listdir(folder_path):
    if filename.endswith('.csv'):
        file_path = os.path.join(folder_path, filename)
        data = preprocess_csv(file_path)
        if data is not None:
            processed_data.append(data)
            file_names.append(filename)


# Preparing Data for LSTM
sequence_length = 20
X = []
for data in processed_data:
    for i in range(len(data) - sequence_length + 1):
        X.append(data[i:i + sequence_length])

# Convert to NumPy array and reshape
X = np.array(X)
X = X.reshape(X.shape[0], sequence_length, 1)  # Reshape to (number_of_samples, sequence_length, num_features)

# Check for NaN values in X
if np.any(np.isnan(X)):
    print("NaN values found in X. Exiting.")
    exit()

# Create a more complex LSTM model
model = Sequential()
model.add(LSTM(100, return_sequences=True, input_shape=(sequence_length, 1)))  # First LSTM layer
model.add(Dropout(0.2))  # Dropout layer to prevent overfitting
model.add(LSTM(50, return_sequences=True))  # Second LSTM layer
model.add(Dropout(0.2))  # Another Dropout layer
model.add(LSTM(25))  # Third LSTM layer
model.add(Dropout(0.2))  # Dropout layer
model.add(Dense(1))  # Output layer
model.compile(optimizer='adam', loss='mse')

# Fit the model
model.fit(X, X, epochs=20, batch_size=32)  # Increased epochs for better training

# Predict LSTM features
lstm_features = model.predict(X)
lstm_features = lstm_features.reshape(lstm_features.shape[0], -1)
fourier_features = [fourier_transform(data.flatten()) for data in processed_data]

# Combine features for clustering
combined_features = []
for lstm_f, fourier_f in zip(lstm_features, fourier_features):
    combined_features.append(np.concatenate((lstm_f, fourier_f[:10])))

# KMeans clustering
kmeans = KMeans(n_clusters=12, random_state=0)
kmeans.fit(combined_features)
labels = kmeans.labels_

# Create directories for clusters
os.makedirs('Signature Fault Clusters Version 4 Sequencial to 20/VRM', exist_ok=True)
for i in range(12):
    os.makedirs(os.path.join('Signature Fault Clusters Version 4 Sequencial to 20/VRM', f'VRM Cluster {i}'), exist_ok=True)

# Move files to their respective clusters
for i, filename in enumerate(file_names):
    cluster_label = labels[i]
    source_path = os.path.join(folder_path, filename)
    destination_path = os.path.join('Signature Fault Clusters Version 4 Sequencial to 20/VRM', f'VRM Cluster {cluster_label}', filename)
    os.rename(source_path, destination_path)

NaN values found in testing_data_trial_2\04_04_2023_17_18_15_4349.csv.
All values are NaN in testing_data_trial_2\04_04_2023_17_18_15_4349.csv. Skipping this file.
NaN values found in testing_data_trial_2\06_07_2023_13_03_06_10047.csv.
All values are NaN in testing_data_trial_2\06_07_2023_13_03_06_10047.csv. Skipping this file.
NaN values found in testing_data_trial_2\07_09_2023_18_10_24_12842.csv.
All values are NaN in testing_data_trial_2\07_09_2023_18_10_24_12842.csv. Skipping this file.
NaN values found in testing_data_trial_2\08_10_2023_10_13_58_14447.csv.
All values are NaN in testing_data_trial_2\08_10_2023_10_13_58_14447.csv. Skipping this file.
NaN values found in testing_data_trial_2\08_10_2023_10_48_23_14449.csv.
All values are NaN in testing_data_trial_2\08_10_2023_10_48_23_14449.csv. Skipping this file.


  super().__init__(**kwargs)


Epoch 1/20
[1m73249/73249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1099s[0m 15ms/step - loss: 0.0098
Epoch 2/20
[1m73249/73249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1014s[0m 14ms/step - loss: 0.0084
Epoch 3/20
[1m73249/73249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1686s[0m 23ms/step - loss: 0.0084
Epoch 4/20
[1m73249/73249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1066s[0m 15ms/step - loss: 0.0084
Epoch 5/20
[1m73249/73249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1071s[0m 15ms/step - loss: 0.0083
Epoch 6/20
[1m73249/73249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1973s[0m 27ms/step - loss: 0.0083
Epoch 7/20
[1m73249/73249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1180s[0m 16ms/step - loss: 0.0084
Epoch 8/20
[1m73249/73249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1061s[0m 14ms/step - loss: 0.0083
Epoch 9/20
[1m73249/73249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1155s[0m 16ms/step - loss: 0.0083
Epoch 10/20
[1m732