In [None]:
import numpy as np
import pandas as pd
import csv
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, RepeatVector, Dense
from tensorflow.keras.layers import Reshape
from sklearn.preprocessing import MinMaxScaler

##Peaks Normalization and Log Transformation

In [None]:
data = []
with open('Peaks.csv') as f:
    reader = csv.reader(f)
    next(reader) # skip header
    for row in reader:
        float_row = []
        for x in row[1:]:
            if x == '':
                float_row.append(np.nan)
            else:
                float_row.append(float(x))
        data.append(float_row)

# Replace NaN with 0
data = [np.nan_to_num(x) for x in data]

data=pd.DataFrame(data)
#Log transform
def log_transform(x):
    return np.log(x) if x > 0 else x

data = data.applymap(log_transform)

# Normalize the data
scaler = MinMaxScaler()
normalized_data = scaler.fit_transform(data)

data = pd.DataFrame(normalized_data)

data = data.iloc[91:]
timesteps = 29
n_features = data.shape[1]

In [None]:
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
91,0.069168,0.176509,0.564049,0.817757,0.347033,0.860707,0.92059,1.0,0.990861,0.533145,0.253642
92,0.217291,0.264445,0.407301,0.686477,0.244306,0.565762,0.913335,0.972659,0.986428,0.626912,0.253642
93,0.132179,0.378657,0.436799,0.797944,0.297506,0.912097,0.870094,0.905314,0.980558,0.726903,0.512032
94,0.281155,0.488094,0.655198,0.579835,0.271399,0.689883,0.973338,0.852383,0.98188,0.574103,0.220795
95,0.293265,0.693835,0.615815,0.801268,0.244306,0.908731,0.924119,0.847351,0.947854,0.750823,0.220795
96,0.393838,0.559225,0.668665,0.69737,0.186847,0.825343,0.863056,0.814523,0.931104,0.613895,0.285536
97,0.393838,0.488094,0.596639,0.323996,0.562466,0.448258,0.867776,0.776043,0.903574,0.665251,0.253642
98,0.361874,0.456751,0.528768,0.579835,0.516727,0.385492,0.901923,0.871451,0.901507,0.702567,0.186937
99,0.256296,0.698982,0.514749,0.31908,0.535374,0.457664,0.88771,0.847351,0.921163,0.808893,0.316532
100,0.350891,0.578835,0.721592,0.522807,0.457692,0.689883,0.870094,0.875975,0.967484,0.990041,0.404602


In [None]:
inputs = Input(shape=(timesteps, 1))
x = LSTM(30)(inputs)
x = Dense(15)(x)
x = Dense(10)(x)
x = Dense(5)(x)
encoded = Dense(1)(x)

# decoded = Dense(16)(encoded)
# decoded = Dense(32)(decoded)
# decoded = Dense(61)(decoded)
# decoded = Reshape((61, 1))(decoded)
# decoded = LSTM(1, return_sequences=True)(decoded)
decoded = Dense(timesteps)(encoded)
decoded = Reshape((timesteps, 1))(decoded)
decoded = LSTM(30, return_sequences=True)(decoded)
decoded = Dense(15)(decoded)
decoded = Dense(10)(decoded)
decoded = Dense(5)(decoded)
decoded = Dense(1)(decoded)

latent_space_model = Model(inputs, encoded)
autoencoder = Model(inputs, decoded)
autoencoder.compile(optimizer='adam', loss='mae')


In [None]:
autoencoder.summary()

Model: "model_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 29, 1)]           0         
                                                                 
 lstm_4 (LSTM)               (None, 30)                3840      
                                                                 
 dense_18 (Dense)            (None, 15)                465       
                                                                 
 dense_19 (Dense)            (None, 10)                160       
                                                                 
 dense_20 (Dense)            (None, 5)                 55        
                                                                 
 dense_21 (Dense)            (None, 1)                 6         
                                                                 
 dense_22 (Dense)            (None, 29)                58  

In [None]:
for i in range(n_features):
    print(f'Training on column: {i+1}/{n_features}')
    x_train = data.iloc[:, i].values.reshape(-1, timesteps, 1)

    autoencoder.fit(x_train, x_train, epochs=50, batch_size=16,verbose=0)

# Save model weights
autoencoder.save_weights('autoencoder_weights.h5')

Training on column: 1/11
Training on column: 2/11
Training on column: 3/11
Training on column: 4/11
Training on column: 5/11
Training on column: 6/11
Training on column: 7/11
Training on column: 8/11
Training on column: 9/11
Training on column: 10/11
Training on column: 11/11


In [None]:
def predict_sequence_and_latent_space(input_sequence):
    input_sequence = np.array(input_sequence)
    input_sequence = input_sequence.reshape(1, -1, 1)
    print(input_sequence.shape)
    predicted_sequence = autoencoder.predict(input_sequence)
    latent_space_output = latent_space_model.predict(input_sequence)
    return predicted_sequence.reshape(-1), latent_space_output[0][0]

def denormalize(normalized_value, min_val, max_val):
    return normalized_value * (max_val - min_val) + min_val

# Denormalize the input sequence
column_to_predict = 1
input_sequence = data.iloc[:, column_to_predict].values.tolist()
min_input = scaler.data_min_[column_to_predict]
max_input = scaler.data_max_[column_to_predict]
denormalized_input_sequence = [denormalize(val, min_input, max_input) for val in input_sequence]

# Denormalize the predictions and latent space output
min_output = scaler.data_min_[0]
max_output = scaler.data_max_[0]
predictions, latent_space_output = predict_sequence_and_latent_space(input_sequence)
denormalized_predictions = [denormalize(val, min_output, max_output) for val in predictions]
denormalized_latent_output = denormalize(latent_space_output, min_output, max_output)

print("Original Sequence:")
print(denormalized_input_sequence)

print("Predicted Sequence:")
print(denormalized_predictions)

print("Latent Space Output:")
print(denormalized_latent_output)

(1, 29, 1)
Original Sequence:
[3.6635616461296467, 3.828641396489095, 4.04305126783455, 4.248495242049359, 4.634728988229636, 4.382026634673881, 4.248495242049359, 4.189654742026425, 4.6443908991413725, 4.418840607796599, 4.454347296253507, 4.418840607796599, 4.574710978503383, 4.653960350157523, 4.48863636973214, 4.634728988229636, 4.718498871295095, 4.6443908991413725, 4.955827057601262, 4.3694478524670215, 4.406719247264253, 4.605170185988092, 4.276666119016055, 4.736198448394496, 4.672828834461906, 4.595119850134591, 4.31748811353631, 4.709530201312334, 4.330733340286331]
Predicted Sequence:
[4.168145919576295, 4.177501211030522, 4.183358522586396, 4.1833929773602545, 4.179357519817443, 4.177236342585821, 4.177701503067565, 4.180446357982271, 4.180529444891148, 4.176645941735608, 4.168087527358816, 4.160307900251452, 4.160305291953553, 4.162489951790488, 4.165509982133528, 4.169418137911145, 4.174548281254397, 4.182560131013406, 4.189701103759428, 4.196549736794169, 4.1991341393187

In [None]:
import pandas as pd

# Convert the predicted sequence back to a DataFrame
predictions_df = pd.DataFrame(denormalized_predictions, columns=['Predicted Sequence'])

# Get the original DataFrame
original_df = pd.DataFrame(denormalized_input_sequence, columns = ['Original Sequence'])

# Get the column to predict (column_to_predict) and the corresponding column name
# column_name = original_df.columns[column_to_predict]

# Append the predicted sequence to the original DataFrame
original_df['Predicted Sequence'] = predictions_df

# Save the DataFrame to a CSV file
original_df.to_csv('predicted_sequences.csv', index=False)

print("Original Sequence and Predicted Sequence saved to 'predicted_sequences.csv'")


Original Sequence and Predicted Sequence saved to 'predicted_sequences.csv'


In [None]:
def inverse_log_transform(x):
    return np.exp(x) if x > 0 else x

# Apply the inverse log transform to the data
latent_space_op = inverse_log_transform(denormalized_latent_output)
print(latent_space_op)

84.35938521839228


In [None]:
# Assuming you have the data and scaler already defined before this point
num_columns = 11  # Total number of columns in your data

def predict_and_get_latent_output(input_sequence, column_idx):
    input_sequence = np.array(input_sequence)
    input_sequence = input_sequence.reshape(1, -1, 1)
    predicted_sequence = autoencoder.predict(input_sequence)
    latent_space_output = latent_space_model.predict(input_sequence)
    return predicted_sequence.reshape(-1), latent_space_output.reshape(-1)

# Create an array to store the latent_space_op values for each column
latent_space_op_array = []

# Loop over each column
for column_idx in range(num_columns):
    # Denormalize the input sequence
    input_sequence = data.iloc[:, column_idx].values.tolist()
    min_input = scaler.data_min_[column_idx]
    max_input = scaler.data_max_[column_idx]
    denormalized_input_sequence = [denormalize(val, min_input, max_input) for val in input_sequence]

    # Denormalize the predictions and latent space output for this column
    min_output = scaler.data_min_[0]  # Assuming the latent space output is in the first column
    max_output = scaler.data_max_[0]
    predictions, latent_space_output = predict_and_get_latent_output(input_sequence, column_idx)
    denormalized_predictions = [denormalize(val, min_output, max_output) for val in predictions]
    denormalized_latent_output = denormalize(latent_space_output, min_output, max_output)


    inverse_log_transformed_predictions = []
    inverse_log_transformed_input_sequence = []

    # Loop over the denormalized predictions and input sequences
    for prediction, input_val in zip(denormalized_predictions, denormalized_input_sequence):
        # Apply inverse log transform to the values and store them in the arrays
        inverse_log_transformed_predictions.append(inverse_log_transform(prediction))
        inverse_log_transformed_input_sequence.append(inverse_log_transform(input_val))

    # Print the inverse log transform results

    # Apply the inverse log transform to the data for this column
    latent_space_op = inverse_log_transform(denormalized_latent_output)
    latent_space_op_array.append(latent_space_op)

# Print the results or perform any further operations with latent_space_op_array
print("Inverse Log Transformed Predictions:")
print(inverse_log_transformed_predictions)

print("Inverse Log Transformed Input Sequence:")
print(inverse_log_transformed_input_sequence)
print("Latent Space Output for each column:")

print(latent_space_op_array)


Inverse Log Transformed Predictions:
[64.60784354291344, 65.24047122991783, 65.6559245126413, 65.68141812111253, 65.41204045835487, 65.28849233896649, 65.33912159496933, 65.56377100768103, 65.60321166835725, 65.35092039161276, 64.77462613523885, 64.22288562181078, 64.1961540433114, 64.33418317643007, 64.50816689690086, 64.75685868745694, 65.08277498187392, 65.63483650316655, 66.11835764752517, 66.5941999547977, 66.73825833134086, 67.10309311175291, 66.78409182890303, 66.0177114611008, 65.38591145273583, 64.62262576181489, 63.86113313897262, 62.168707188236475, 60.17843191242208]
Inverse Log Transformed Input Sequence:
[34.00000000000001, 34.00000000000001, 42.99999999999998, 33.0, 33.0, 34.99999999999999, 34.00000000000001, 32.0, 36.0, 38.99999999999998, 37.99999999999999, 36.99999999999999, 36.99999999999999, 34.99999999999999, 41.00000000000001, 42.00000000000001, 42.00000000000001, 41.00000000000001, 43.99999999999997, 41.00000000000001, 42.00000000000001, 41.00000000000001, 37.9999

In [None]:
latent_space_op_array

[array([91.83612], dtype=float32),
 array([84.3594], dtype=float32),
 array([83.94637], dtype=float32),
 array([93.41439], dtype=float32),
 array([93.89263], dtype=float32),
 array([94.99288], dtype=float32),
 array([93.80005], dtype=float32),
 array([94.20094], dtype=float32),
 array([93.58863], dtype=float32),
 array([78.774895], dtype=float32),
 array([90.243965], dtype=float32)]

In [None]:
# inverse_log_transformed_predictions = []
# inverse_log_transformed_input_sequence = []

# # Loop over the denormalized predictions and input sequences
# for prediction, input_val in zip(denormalized_predictions, denormalized_input_sequence):
#     # Apply inverse log transform to the values and store them in the arrays
#     inverse_log_transformed_predictions.append(inverse_log_transform(prediction))
#     inverse_log_transformed_input_sequence.append(inverse_log_transform(input_val))

# Create a list of headers for the CSV file
headers = ['denormalized_predictions', 'denormalized_input_sequence']

# Combine the lists into a single array
combined_data = np.column_stack((denormalized_predictions, denormalized_input_sequence))

# Save the data to a CSV file
output_file = 'output_data.csv'
with open(output_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    # Write the headers
    writer.writerow(headers)
    # Write the data below each header
    writer.writerows(combined_data)

print("Data saved to", output_file)








Data saved to output_data.csv


In [None]:
data = [['Latent Space Output for each column:']]
for item in latent_space_op_array:
    data.append([item[0]])

# Write data to CSV file
csv_filename = 'latent_space_output.csv'
with open(csv_filename, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerows(data)


#AVGFreq