In [None]:
import numpy as np
import pandas as pd
import csv
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, RepeatVector, Dense
from tensorflow.keras.layers import Reshape
from sklearn.preprocessing import MinMaxScaler

##Peaks Normalization and Log Transformation

In [None]:
data = []
with open('Peaks.csv') as f:
    reader = csv.reader(f)
    next(reader) # skip header
    for row in reader:
        float_row = []
        for x in row[1:]:
            if x == '':
                float_row.append(np.nan)
            else:
                float_row.append(float(x))
        data.append(float_row)

# Replace NaN with 0
data = [np.nan_to_num(x) for x in data]

data=pd.DataFrame(data)
#Log transform
def log_transform(x):
    return np.log(x) if x > 0 else x

data = data.applymap(log_transform)

# Normalize the data
scaler = MinMaxScaler()
normalized_data = scaler.fit_transform(data)

data = pd.DataFrame(normalized_data)


data = data.iloc[:61]
timesteps = 61
n_features = data.shape[1]

In [None]:
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,0.305171,0.448618,0.072639,0.194722,0.661639,0.414819,0.947186,0.814523,0.997609,0.039982,0.721134
1,0.339734,0.661865,0.000000,0.575849,0.684430,0.713411,0.893928,0.768934,0.989606,0.224364,0.655751
2,0.230532,0.621966,0.030552,0.367378,0.727950,0.480899,0.935992,0.831538,0.995185,0.171555,0.802674
3,0.217291,0.597749,0.059065,0.777804,0.436883,0.857212,0.899955,0.852383,0.988975,0.078854,0.878586
4,0.463248,0.688638,0.134710,0.510233,0.768982,0.521754,0.899955,0.884757,0.985140,0.206982,0.655751
...,...,...,...,...,...,...,...,...,...,...,...
56,0.328400,0.405981,0.738811,0.693750,0.426247,0.888355,0.885593,0.814523,0.926655,0.831462,0.404602
57,0.268834,0.329753,0.655198,0.547574,0.186847,0.476283,0.897966,0.737999,0.913562,0.831462,0.404602
58,0.453739,0.215985,0.760741,0.462987,0.467874,0.385492,0.893928,0.754000,0.885173,0.907706,0.432459
59,0.592406,0.298099,0.697499,0.856445,0.487816,0.548328,0.889805,0.789618,0.937150,0.842612,0.512032


In [None]:
inputs = Input(shape=(timesteps, 1))
x = LSTM(61)(inputs)
x = Dense(32)(x)
x = Dense(16)(x)
encoded = Dense(1)(x)

# decoded = Dense(16)(encoded)
# decoded = Dense(32)(decoded)
# decoded = Dense(61)(decoded)
# decoded = Reshape((61, 1))(decoded)
# decoded = LSTM(1, return_sequences=True)(decoded)
decoded = Dense(timesteps)(encoded)
decoded = Reshape((timesteps, 1))(decoded)
decoded = LSTM(61, return_sequences=True)(decoded)
decoded = Dense(32)(decoded)
decoded = Dense(16)(decoded)
decoded = Dense(1)(decoded)

latent_space_model = Model(inputs, encoded)
autoencoder = Model(inputs, decoded)
autoencoder.compile(optimizer='adam', loss='mae')


In [None]:
autoencoder.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 61, 1)]           0         
                                                                 
 lstm (LSTM)                 (None, 61)                15372     
                                                                 
 dense (Dense)               (None, 32)                1984      
                                                                 
 dense_1 (Dense)             (None, 16)                528       
                                                                 
 dense_2 (Dense)             (None, 1)                 17        
                                                                 
 dense_3 (Dense)             (None, 61)                122       
                                                                 
 reshape (Reshape)           (None, 61, 1)             0   

In [None]:
for i in range(n_features):
    print(f'Training on column: {i+1}/{n_features}')
    x_train = data.iloc[:, i].values.reshape(-1, timesteps, 1)

    autoencoder.fit(x_train, x_train, epochs=25, batch_size=8,verbose=0)

# Save model weights
autoencoder.save_weights('autoencoder_weights.h5')

Training on column: 1/11
Training on column: 2/11
Training on column: 3/11
Training on column: 4/11
Training on column: 5/11
Training on column: 6/11
Training on column: 7/11
Training on column: 8/11
Training on column: 9/11
Training on column: 10/11
Training on column: 11/11


In [None]:
def predict_sequence_and_latent_space(input_sequence):
    input_sequence = np.array(input_sequence)
    input_sequence = input_sequence.reshape(1, -1, 1)
    print(input_sequence.shape)
    predicted_sequence = autoencoder.predict(input_sequence)
    latent_space_output = latent_space_model.predict(input_sequence)
    return predicted_sequence.reshape(-1), latent_space_output[0][0]

def denormalize(normalized_value, min_val, max_val):
    return normalized_value * (max_val - min_val) + min_val

# Denormalize the input sequence
column_to_predict = 1
input_sequence = data.iloc[:, column_to_predict].values.tolist()
min_input = scaler.data_min_[column_to_predict]
max_input = scaler.data_max_[column_to_predict]
denormalized_input_sequence = [denormalize(val, min_input, max_input) for val in input_sequence]

# Denormalize the predictions and latent space output
min_output = scaler.data_min_[0]
max_output = scaler.data_max_[0]
predictions, latent_space_output = predict_sequence_and_latent_space(input_sequence)
denormalized_predictions = [denormalize(val, min_output, max_output) for val in predictions]
denormalized_latent_output = denormalize(latent_space_output, min_output, max_output)

print("Original Sequence:")
print(denormalized_input_sequence)

print("Predicted Sequence:")
print(denormalized_predictions)

print("Latent Space Output:")
print(denormalized_latent_output)

(1, 61, 1)
Original Sequence:
[4.174387269895637, 4.574710978503383, 4.499809670330265, 4.454347296253507, 4.624972813284271, 4.663439094112068, 4.276666119016055, 4.564348191467836, 4.189654742026425, 4.499809670330265, 4.406719247264253, 4.418840607796599, 4.983606621708336, 5.209486152841421, 4.852030263919617, 4.574710978503383, 4.465908118654584, 4.736198448394496, 4.718498871295095, 3.891820298110627, 4.248495242049359, 4.219507705176107, 4.02535169073515, 4.382026634673881, 3.951243718581428, 3.4657359027997265, 3.555348061489414, 3.7135720667043084, 3.6109179126442243, 3.6109179126442243, 3.9120230054281464, 3.4965075614664807, 3.637586159726386, 4.007333185232471, 4.02535169073515, 4.356708826689592, 3.8066624897703196, 3.637586159726386, 4.04305126783455, 3.6635616461296467, 3.9120230054281464, 3.401197381662156, 4.07753744390572, 3.931825632724326, 4.1588830833596715, 4.532599493153256, 4.174387269895637, 3.931825632724326, 4.418840607796599, 4.1588830833596715, 3.3322045101

In [None]:
def inverse_log_transform(x):
    return np.exp(x) if x > 0 else x

# Apply the inverse log transform to the data
latent_space_op = inverse_log_transform(denormalized_latent_output)
print(latent_space_op)

37.32201959796914


In [None]:
import pandas as pd

# Convert the predicted sequence back to a DataFrame
predictions_df = pd.DataFrame(denormalized_predictions, columns=['Predicted Sequence'])

# Get the original DataFrame
original_df = pd.DataFrame(denormalized_input_sequence, columns = ['Original Sequence'])

# Get the column to predict (column_to_predict) and the corresponding column name
# column_name = original_df.columns[column_to_predict]

# Append the predicted sequence to the original DataFrame
original_df['Predicted Sequence'] = predictions_df

# Save the DataFrame to a CSV file
original_df.to_csv('predicted_sequences.csv', index=False)

print("Original Sequence and Predicted Sequence saved to 'predicted_sequences.csv'")


Original Sequence and Predicted Sequence saved to 'predicted_sequences.csv'


In [None]:
# Assuming you have the data and scaler already defined before this point
num_columns = 11  # Total number of columns in your data

def predict_and_get_latent_output(input_sequence, column_idx):
    input_sequence = np.array(input_sequence)
    input_sequence = input_sequence.reshape(1, -1, 1)
    predicted_sequence = autoencoder.predict(input_sequence)
    latent_space_output = latent_space_model.predict(input_sequence)
    return predicted_sequence.reshape(-1), latent_space_output.reshape(-1)

# Create an array to store the latent_space_op values for each column
latent_space_op_array = []

# Loop over each column
for column_idx in range(num_columns):
    # Denormalize the input sequence
    input_sequence = data.iloc[:, column_idx].values.tolist()
    min_input = scaler.data_min_[column_idx]
    max_input = scaler.data_max_[column_idx]
    denormalized_input_sequence = [denormalize(val, min_input, max_input) for val in input_sequence]

    # Denormalize the predictions and latent space output for this column
    min_output = scaler.data_min_[0]  # Assuming the latent space output is in the first column
    max_output = scaler.data_max_[0]
    predictions, latent_space_output = predict_and_get_latent_output(input_sequence, column_idx)
    denormalized_predictions = [denormalize(val, min_output, max_output) for val in predictions]
    denormalized_latent_output = denormalize(latent_space_output, min_output, max_output)


    inverse_log_transformed_predictions = []
    inverse_log_transformed_input_sequence = []

    # Loop over the denormalized predictions and input sequences
    for prediction, input_val in zip(denormalized_predictions, denormalized_input_sequence):
        # Apply inverse log transform to the values and store them in the arrays
        inverse_log_transformed_predictions.append(inverse_log_transform(prediction))
        inverse_log_transformed_input_sequence.append(inverse_log_transform(input_val))

    # Print the inverse log transform results

    # Apply the inverse log transform to the data for this column
    latent_space_op = inverse_log_transform(denormalized_latent_output)
    latent_space_op_array.append(latent_space_op)

# Print the results or perform any further operations with latent_space_op_array
print("Inverse Log Transformed Predictions:")
print(inverse_log_transformed_predictions)

print("Inverse Log Transformed Input Sequence:")
print(inverse_log_transformed_input_sequence)
print("Latent Space Output for each column:")

print(latent_space_op_array)


Inverse Log Transformed Predictions:
[79.4998217188047, 81.49194334044647, 82.60863864005087, 83.18985155047619, 83.45040845756407, 83.52765154301544, 83.50624026680748, 83.44979759764895, 83.3802800715244, 83.30838814043602, 83.24332376575633, 83.18814369461167, 83.15076863189954, 83.12979669122063, 83.12704792638627, 83.12442514391935, 83.11347329865649, 83.11014467124366, 83.11743146190261, 83.12005402370146, 83.11808884295554, 83.1070958773961, 83.10143213067165, 83.10577430196818, 83.11082996598962, 83.10769723613525, 83.0930840522916, 83.07181911980624, 83.04397741908919, 83.02130702581138, 83.00008141282669, 82.97046961294406, 82.9457116202402, 82.9212052055596, 82.89694317542205, 82.87917319762123, 82.86137914165315, 82.84416047647748, 82.83953226338252, 82.84642588587556, 82.8536477274741, 82.86017999317806, 82.8704569796427, 82.85499317872377, 82.83995046478249, 82.83172622476125, 82.83062507494635, 82.83346858032387, 82.83589400028363, 82.82964241563674, 82.81865275794152, 8

In [None]:
latent_space_op_array

[array([38.903385], dtype=float32),
 array([37.322018], dtype=float32),
 array([41.89672], dtype=float32),
 array([40.90016], dtype=float32),
 array([39.57935], dtype=float32),
 array([38.858833], dtype=float32),
 array([44.161167], dtype=float32),
 array([42.85986], dtype=float32),
 array([44.46457], dtype=float32),
 array([42.664997], dtype=float32),
 array([38.976498], dtype=float32)]

In [None]:
# inverse_log_transformed_predictions = []
# inverse_log_transformed_input_sequence = []

# # Loop over the denormalized predictions and input sequences
# for prediction, input_val in zip(denormalized_predictions, denormalized_input_sequence):
#     # Apply inverse log transform to the values and store them in the arrays
#     inverse_log_transformed_predictions.append(inverse_log_transform(prediction))
#     inverse_log_transformed_input_sequence.append(inverse_log_transform(input_val))

# Create a list of headers for the CSV file
headers = ['denormalized_predictions', 'denormalized_input_sequence']

# Combine the lists into a single array
combined_data = np.column_stack((denormalized_predictions, denormalized_input_sequence))

# Save the data to a CSV file
output_file = 'output_data.csv'
with open(output_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    # Write the headers
    writer.writerow(headers)
    # Write the data below each header
    writer.writerows(combined_data)

print("Data saved to", output_file)








Data saved to output_data.csv


In [None]:
data = [['Latent Space Output for each column:']]
for item in latent_space_op_array:
    data.append([item[0]])

# Write data to CSV file
csv_filename = 'latent_space_output.csv'
with open(csv_filename, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerows(data)


#AVGFreq