In [None]:
import numpy as np
import pandas as pd
import csv
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, RepeatVector, Dense
from tensorflow.keras.layers import Reshape
from sklearn.preprocessing import MinMaxScaler

##Peaks Normalization and Log Transformation

In [None]:
data = []
with open('Peaks.csv') as f:
    reader = csv.reader(f)
    next(reader) # skip header
    for row in reader:
        float_row = []
        for x in row[1:]:
            if x == '':
                float_row.append(np.nan)
            else:
                float_row.append(float(x))
        data.append(float_row)

# Replace NaN with 0
data = [np.nan_to_num(x) for x in data]

data=pd.DataFrame(data)
#Log transform
def log_transform(x):
    return np.log(x) if x > 0 else x

data = data.applymap(log_transform)

# Normalize the data
scaler = MinMaxScaler()
normalized_data = scaler.fit_transform(data)

data = pd.DataFrame(normalized_data)

data = data.iloc[60:76]
timesteps = 16
n_features = data.shape[1]

In [None]:
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
60,0.361874,0.43197,0.752648,0.63812,0.535374,0.319709,0.883452,0.796109,0.902544,0.762631,0.404602
61,0.535047,0.440359,0.678494,0.595651,0.370571,0.591503,0.901923,0.761591,0.92023,0.842612,0.186937
62,0.434329,0.405981,0.661984,0.449778,0.230367,0.330038,0.867776,0.782933,0.92209,0.505159,0.220795
63,0.339734,0.517696,0.706694,0.514439,0.347033,0.570086,0.86543,0.768934,0.903574,0.665251,0.404602
64,0.500057,0.43197,0.791661,0.4586,0.347033,0.380543,0.899955,0.814523,0.889679,0.371793,0.316532
65,0.203797,0.472653,0.755363,0.467358,0.322696,0.385492,0.850735,0.768934,0.915493,0.356158,0.285536
66,0.31688,0.414786,0.738811,0.583808,0.156295,0.51278,0.913335,0.746143,0.93715,0.519222,0.220795
67,0.293265,0.387922,0.709715,0.178248,0.107882,0.17206,0.974694,0.842203,0.98188,0.560583,0.186937
68,0.37269,0.488094,0.671966,0.595651,0.201649,0.616769,0.997612,0.857304,0.979894,0.613895,0.040015
69,0.350891,0.405981,0.671966,0.840474,0.230367,1.0,0.956297,0.82034,0.975171,0.546931,0.285536


In [None]:
inputs = Input(shape=(timesteps, 1))
x = LSTM(16)(inputs)
x = Dense(10)(x)
x = Dense(5)(x)
encoded = Dense(1)(x)

# decoded = Dense(16)(encoded)
# decoded = Dense(32)(decoded)
# decoded = Dense(61)(decoded)
# decoded = Reshape((61, 1))(decoded)
# decoded = LSTM(1, return_sequences=True)(decoded)
decoded = Dense(timesteps)(encoded)
decoded = Reshape((timesteps, 1))(decoded)
decoded = LSTM(16, return_sequences=True)(decoded)
decoded = Dense(10)(decoded)
decoded = Dense(5)(decoded)
decoded = Dense(1)(decoded)

latent_space_model = Model(inputs, encoded)
autoencoder = Model(inputs, decoded)
autoencoder.compile(optimizer='adam', loss='mae')


In [None]:
autoencoder.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 16, 1)]           0         
                                                                 
 lstm (LSTM)                 (None, 16)                1152      
                                                                 
 dense (Dense)               (None, 10)                170       
                                                                 
 dense_1 (Dense)             (None, 5)                 55        
                                                                 
 dense_2 (Dense)             (None, 1)                 6         
                                                                 
 dense_3 (Dense)             (None, 16)                32        
                                                                 
 reshape (Reshape)           (None, 16, 1)             0   

In [None]:
for i in range(n_features):
    print(f'Training on column: {i+1}/{n_features}')
    x_train = data.iloc[:, i].values.reshape(-1, timesteps, 1)

    autoencoder.fit(x_train, x_train, epochs=25, batch_size=8,verbose=0)

# Save model weights
autoencoder.save_weights('autoencoder_weights.h5')

Training on column: 1/11
Training on column: 2/11
Training on column: 3/11
Training on column: 4/11
Training on column: 5/11
Training on column: 6/11
Training on column: 7/11
Training on column: 8/11
Training on column: 9/11
Training on column: 10/11
Training on column: 11/11


In [None]:
def predict_sequence_and_latent_space(input_sequence):
    input_sequence = np.array(input_sequence)
    input_sequence = input_sequence.reshape(1, -1, 1)
    print(input_sequence.shape)
    predicted_sequence = autoencoder.predict(input_sequence)
    latent_space_output = latent_space_model.predict(input_sequence)
    return predicted_sequence.reshape(-1), latent_space_output[0][0]

def denormalize(normalized_value, min_val, max_val):
    return normalized_value * (max_val - min_val) + min_val

# Denormalize the input sequence
column_to_predict = 1
input_sequence = data.iloc[:, column_to_predict].values.tolist()
min_input = scaler.data_min_[column_to_predict]
max_input = scaler.data_max_[column_to_predict]
denormalized_input_sequence = [denormalize(val, min_input, max_input) for val in input_sequence]

# Denormalize the predictions and latent space output
min_output = scaler.data_min_[0]
max_output = scaler.data_max_[0]
predictions, latent_space_output = predict_sequence_and_latent_space(input_sequence)
denormalized_predictions = [denormalize(val, min_output, max_output) for val in predictions]
denormalized_latent_output = denormalize(latent_space_output, min_output, max_output)

print("Original Sequence:")
print(denormalized_input_sequence)

print("Predicted Sequence:")
print(denormalized_predictions)

print("Latent Space Output:")
print(denormalized_latent_output)

(1, 16, 1)
Original Sequence:
[4.143134726391533, 4.1588830833596715, 4.0943445622221, 4.30406509320417, 4.143134726391533, 4.219507705176107, 4.110873864173311, 4.06044301054642, 4.248495242049359, 4.0943445622221, 4.343805421853684, 4.564348191467836, 4.262679877041316, 4.174387269895637, 4.442651256490317, 4.6443908991413725]
Predicted Sequence:
[4.067246145031271, 4.072170274909831, 4.0590074574937, 4.047840408663438, 4.032232732660296, 4.013467859293155, 4.005637601758027, 3.996771366159844, 3.9985987312539506, 3.995336423691559, 3.993021979999493, 3.9955034178610704, 3.9938017138286415, 3.9921452763855547, 4.000178518394266, 3.9993883934428434]
Latent Space Output:
3.529381841444751


In [None]:
def inverse_log_transform(x):
    return np.exp(x) if x > 0 else x

# Apply the inverse log transform to the data
latent_space_op = inverse_log_transform(denormalized_latent_output)
print(latent_space_op)

34.10288011061631


In [None]:
import pandas as pd

# Convert the predicted sequence back to a DataFrame
predictions_df = pd.DataFrame(denormalized_predictions, columns=['Predicted Sequence'])

# Get the original DataFrame
original_df = pd.DataFrame(denormalized_input_sequence, columns = ['Original Sequence'])

# Get the column to predict (column_to_predict) and the corresponding column name
# column_name = original_df.columns[column_to_predict]

# Append the predicted sequence to the original DataFrame
original_df['Predicted Sequence'] = predictions_df

# Save the DataFrame to a CSV file
original_df.to_csv('predicted_sequences.csv', index=False)

print("Original Sequence and Predicted Sequence saved to 'predicted_sequences.csv'")


Original Sequence and Predicted Sequence saved to 'predicted_sequences.csv'


In [None]:
# Assuming you have the data and scaler already defined before this point
num_columns = 11  # Total number of columns in your data

def predict_and_get_latent_output(input_sequence, column_idx):
    input_sequence = np.array(input_sequence)
    input_sequence = input_sequence.reshape(1, -1, 1)
    predicted_sequence = autoencoder.predict(input_sequence)
    latent_space_output = latent_space_model.predict(input_sequence)
    return predicted_sequence.reshape(-1), latent_space_output.reshape(-1)

# Create an array to store the latent_space_op values for each column
latent_space_op_array = []

# Loop over each column
for column_idx in range(num_columns):
    # Denormalize the input sequence
    input_sequence = data.iloc[:, column_idx].values.tolist()
    min_input = scaler.data_min_[column_idx]
    max_input = scaler.data_max_[column_idx]
    denormalized_input_sequence = [denormalize(val, min_input, max_input) for val in input_sequence]

    # Denormalize the predictions and latent space output for this column
    min_output = scaler.data_min_[0]  # Assuming the latent space output is in the first column
    max_output = scaler.data_max_[0]
    predictions, latent_space_output = predict_and_get_latent_output(input_sequence, column_idx)
    denormalized_predictions = [denormalize(val, min_output, max_output) for val in predictions]
    denormalized_latent_output = denormalize(latent_space_output, min_output, max_output)


    inverse_log_transformed_predictions = []
    inverse_log_transformed_input_sequence = []

    # Loop over the denormalized predictions and input sequences
    for prediction, input_val in zip(denormalized_predictions, denormalized_input_sequence):
        # Apply inverse log transform to the values and store them in the arrays
        inverse_log_transformed_predictions.append(inverse_log_transform(prediction))
        inverse_log_transformed_input_sequence.append(inverse_log_transform(input_val))

    # Print the inverse log transform results

    # Apply the inverse log transform to the data for this column
    latent_space_op = inverse_log_transform(denormalized_latent_output)
    latent_space_op_array.append(latent_space_op)

# Print the results or perform any further operations with latent_space_op_array
print("Inverse Log Transformed Predictions:")
print(inverse_log_transformed_predictions)

print("Inverse Log Transformed Input Sequence:")
print(inverse_log_transformed_input_sequence)
print("Latent Space Output for each column:")

print(latent_space_op_array)


Inverse Log Transformed Predictions:
[57.86259097095039, 58.220105028745245, 57.93467960421875, 57.25786075367679, 56.204385251112285, 55.359235204142756, 54.58840153018006, 54.07147849224096, 53.72107658516163, 53.606045667523816, 53.7847244792223, 53.96802526122838, 53.81139911875888, 53.93977982036865, 54.29506063700909, 54.486622775830774]
Inverse Log Transformed Input Sequence:
[38.99999999999998, 32.0, 33.0, 38.99999999999998, 36.0, 34.99999999999999, 33.0, 32.0, 27.999999999999996, 34.99999999999999, 36.0, 34.99999999999999, 38.99999999999998, 32.0, 39.99999999999998, 34.00000000000001]
Latent Space Output for each column:
[array([37.84621], dtype=float32), array([34.10288], dtype=float32), array([39.063194], dtype=float32), array([34.982643], dtype=float32), array([38.441074], dtype=float32), array([33.870266], dtype=float32), array([26.833815], dtype=float32), array([28.60954], dtype=float32), array([26.631702], dtype=float32), array([35.573463], dtype=float32), array([40.9981

In [None]:
latent_space_op_array

[array([37.84621], dtype=float32),
 array([34.10288], dtype=float32),
 array([39.063194], dtype=float32),
 array([34.982643], dtype=float32),
 array([38.441074], dtype=float32),
 array([33.870266], dtype=float32),
 array([26.833815], dtype=float32),
 array([28.60954], dtype=float32),
 array([26.631702], dtype=float32),
 array([35.573463], dtype=float32),
 array([40.99818], dtype=float32)]

In [None]:
# inverse_log_transformed_predictions = []
# inverse_log_transformed_input_sequence = []

# # Loop over the denormalized predictions and input sequences
# for prediction, input_val in zip(denormalized_predictions, denormalized_input_sequence):
#     # Apply inverse log transform to the values and store them in the arrays
#     inverse_log_transformed_predictions.append(inverse_log_transform(prediction))
#     inverse_log_transformed_input_sequence.append(inverse_log_transform(input_val))

# Create a list of headers for the CSV file
headers = ['denormalized_predictions', 'denormalized_input_sequence']

# Combine the lists into a single array
combined_data = np.column_stack((denormalized_predictions, denormalized_input_sequence))

# Save the data to a CSV file
output_file = 'output_data.csv'
with open(output_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    # Write the headers
    writer.writerow(headers)
    # Write the data below each header
    writer.writerows(combined_data)

print("Data saved to", output_file)








Data saved to output_data.csv


In [None]:
data = [['Latent Space Output for each column:']]
for item in latent_space_op_array:
    data.append([item[0]])

# Write data to CSV file
csv_filename = 'latent_space_output.csv'
with open(csv_filename, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerows(data)


#AVGFreq