In [None]:
import numpy as np
import pandas as pd
import csv
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, RepeatVector, Dense
from tensorflow.keras.layers import Reshape
from sklearn.preprocessing import MinMaxScaler

##Peaks Normalization and Log Transformation

In [None]:
data = []
with open('Peaks.csv') as f:
    reader = csv.reader(f)
    next(reader) # skip header
    for row in reader:
        float_row = []
        for x in row[1:]:
            if x == '':
                float_row.append(np.nan)
            else:
                float_row.append(float(x))
        data.append(float_row)

# Replace NaN with 0
data = [np.nan_to_num(x) for x in data]

data=pd.DataFrame(data)
#Log transform
def log_transform(x):
    return np.log(x) if x > 0 else x

data = data.applymap(log_transform)

# Normalize the data
scaler = MinMaxScaler()
normalized_data = scaler.fit_transform(data)

data = pd.DataFrame(normalized_data)

data = data.iloc[76:91]
timesteps = 15
n_features = data.shape[1]

In [None]:
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
76,0.361874,0.578835,0.33421,0.611266,0.216151,0.51278,0.934339,0.866834,0.93715,0.613895,0.376021
77,0.361874,0.603907,0.311898,0.675486,0.216151,0.705613,0.940868,0.927725,0.975171,0.587493,0.0
78,0.268834,0.545738,0.236991,0.41384,0.107882,0.471652,0.911479,0.802417,0.910625,0.4621,0.152005
79,0.393838,0.524846,0.718654,0.367378,0.358899,0.293542,0.978705,0.927725,0.947854,0.702567,0.253642
80,0.393838,0.464761,0.730288,0.4586,0.0,0.256045,0.939256,0.82034,0.92393,0.519222,0.220795
81,0.31688,0.405981,0.655198,0.881573,0.457692,0.961627,0.860653,0.866834,0.92209,0.897065,0.078625
82,0.101374,0.359631,0.62325,0.567837,0.124385,0.689883,0.891877,0.866834,0.929338,0.831462,0.404602
83,0.256296,0.359631,0.658605,0.634319,0.334968,0.625088,0.88771,0.836934,0.915493,0.587493,0.040015
84,0.256296,0.275901,0.596639,0.83079,0.186847,0.881495,0.915174,0.86212,0.924843,0.652587,0.346679
85,0.203797,0.148466,0.546772,0.52697,0.404492,0.530669,0.939256,0.802417,0.91453,0.690239,0.432459


In [None]:
inputs = Input(shape=(timesteps, 1))
x = LSTM(15)(inputs)
x = Dense(10)(x)
x = Dense(5)(x)
encoded = Dense(1)(x)

# decoded = Dense(16)(encoded)
# decoded = Dense(32)(decoded)
# decoded = Dense(61)(decoded)
# decoded = Reshape((61, 1))(decoded)
# decoded = LSTM(1, return_sequences=True)(decoded)
decoded = Dense(timesteps)(encoded)
decoded = Reshape((timesteps, 1))(decoded)
decoded = LSTM(15, return_sequences=True)(decoded)
decoded = Dense(10)(decoded)
# decoded = Dense(5)(decoded)
decoded = Dense(1)(decoded)

latent_space_model = Model(inputs, encoded)
autoencoder = Model(inputs, decoded)
autoencoder.compile(optimizer='adam', loss='mae')


In [None]:
autoencoder.summary()

Model: "model_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 15, 1)]           0         
                                                                 
 lstm_4 (LSTM)               (None, 15)                1020      
                                                                 
 dense_13 (Dense)            (None, 10)                160       
                                                                 
 dense_14 (Dense)            (None, 5)                 55        
                                                                 
 dense_15 (Dense)            (None, 1)                 6         
                                                                 
 dense_16 (Dense)            (None, 15)                30        
                                                                 
 reshape_2 (Reshape)         (None, 15, 1)             0   

In [None]:
for i in range(n_features):
    print(f'Training on column: {i+1}/{n_features}')
    x_train = data.iloc[:, i].values.reshape(-1, timesteps, 1)

    autoencoder.fit(x_train, x_train, epochs=50, batch_size=16,verbose=0)

# Save model weights
autoencoder.save_weights('autoencoder_weights.h5')

Training on column: 1/11
Training on column: 2/11
Training on column: 3/11
Training on column: 4/11
Training on column: 5/11
Training on column: 6/11
Training on column: 7/11
Training on column: 8/11
Training on column: 9/11
Training on column: 10/11
Training on column: 11/11


In [None]:
def predict_sequence_and_latent_space(input_sequence):
    input_sequence = np.array(input_sequence)
    input_sequence = input_sequence.reshape(1, -1, 1)
    print(input_sequence.shape)
    predicted_sequence = autoencoder.predict(input_sequence)
    latent_space_output = latent_space_model.predict(input_sequence)
    return predicted_sequence.reshape(-1), latent_space_output[0][0]

def denormalize(normalized_value, min_val, max_val):
    return normalized_value * (max_val - min_val) + min_val

# Denormalize the input sequence
column_to_predict = 1
input_sequence = data.iloc[:, column_to_predict].values.tolist()
min_input = scaler.data_min_[column_to_predict]
max_input = scaler.data_max_[column_to_predict]
denormalized_input_sequence = [denormalize(val, min_input, max_input) for val in input_sequence]

# Denormalize the predictions and latent space output
min_output = scaler.data_min_[0]
max_output = scaler.data_max_[0]
predictions, latent_space_output = predict_sequence_and_latent_space(input_sequence)
denormalized_predictions = [denormalize(val, min_output, max_output) for val in predictions]
denormalized_latent_output = denormalize(latent_space_output, min_output, max_output)

print("Original Sequence:")
print(denormalized_input_sequence)

print("Predicted Sequence:")
print(denormalized_predictions)

print("Latent Space Output:")
print(denormalized_latent_output)

(1, 15, 1)
Original Sequence:
[4.418840607796599, 4.465908118654584, 4.356708826689592, 4.31748811353631, 4.204692619390966, 4.0943445622221, 4.007333185232471, 4.007333185232471, 3.8501476017100584, 3.6109179126442243, 3.8501476017100584, 3.737669618283369, 3.637586159726386, 3.5835189384561104, 3.5835189384561104]
Predicted Sequence:
[4.171060566270356, 4.20707379812303, 4.172514566141031, 4.140307135408102, 4.127910147910545, 4.155426470733657, 4.165295092042116, 4.169873706587714, 4.158325215224398, 4.138536605966599, 4.158354074778569, 4.174035119677449, 4.199591853530707, 4.21907108500224, 4.22431094513453]
Latent Space Output:
4.084666588074151


In [None]:
import pandas as pd

# Convert the predicted sequence back to a DataFrame
predictions_df = pd.DataFrame(denormalized_predictions, columns=['Predicted Sequence'])

# Get the original DataFrame
original_df = pd.DataFrame(denormalized_input_sequence, columns = ['Original Sequence'])

# Get the column to predict (column_to_predict) and the corresponding column name
# column_name = original_df.columns[column_to_predict]

# Append the predicted sequence to the original DataFrame
original_df['Predicted Sequence'] = predictions_df

# Save the DataFrame to a CSV file
original_df.to_csv('predicted_sequences.csv', index=False)

print("Original Sequence and Predicted Sequence saved to 'predicted_sequences.csv'")


Original Sequence and Predicted Sequence saved to 'predicted_sequences.csv'


In [None]:
def inverse_log_transform(x):
    return np.exp(x) if x > 0 else x

# Apply the inverse log transform to the data
latent_space_op = inverse_log_transform(denormalized_latent_output)
print(latent_space_op)

59.42212240382215


In [None]:
# Assuming you have the data and scaler already defined before this point
num_columns = 11  # Total number of columns in your data

def predict_and_get_latent_output(input_sequence, column_idx):
    input_sequence = np.array(input_sequence)
    input_sequence = input_sequence.reshape(1, -1, 1)
    predicted_sequence = autoencoder.predict(input_sequence)
    latent_space_output = latent_space_model.predict(input_sequence)
    return predicted_sequence.reshape(-1), latent_space_output.reshape(-1)

# Create an array to store the latent_space_op values for each column
latent_space_op_array = []

# Loop over each column
for column_idx in range(num_columns):
    # Denormalize the input sequence
    input_sequence = data.iloc[:, column_idx].values.tolist()
    min_input = scaler.data_min_[column_idx]
    max_input = scaler.data_max_[column_idx]
    denormalized_input_sequence = [denormalize(val, min_input, max_input) for val in input_sequence]

    # Denormalize the predictions and latent space output for this column
    min_output = scaler.data_min_[0]  # Assuming the latent space output is in the first column
    max_output = scaler.data_max_[0]
    predictions, latent_space_output = predict_and_get_latent_output(input_sequence, column_idx)
    denormalized_predictions = [denormalize(val, min_output, max_output) for val in predictions]
    denormalized_latent_output = denormalize(latent_space_output, min_output, max_output)


    inverse_log_transformed_predictions = []
    inverse_log_transformed_input_sequence = []

    # Loop over the denormalized predictions and input sequences
    for prediction, input_val in zip(denormalized_predictions, denormalized_input_sequence):
        # Apply inverse log transform to the values and store them in the arrays
        inverse_log_transformed_predictions.append(inverse_log_transform(prediction))
        inverse_log_transformed_input_sequence.append(inverse_log_transform(input_val))

    # Print the inverse log transform results

    # Apply the inverse log transform to the data for this column
    latent_space_op = inverse_log_transform(denormalized_latent_output)
    latent_space_op_array.append(latent_space_op)

# Print the results or perform any further operations with latent_space_op_array
print("Inverse Log Transformed Predictions:")
print(inverse_log_transformed_predictions)

print("Inverse Log Transformed Input Sequence:")
print(inverse_log_transformed_input_sequence)
print("Latent Space Output for each column:")

print(latent_space_op_array)


Inverse Log Transformed Predictions:
[64.77177582228767, 67.24793265353343, 64.74193409348952, 62.4321476247355, 61.632240264505896, 63.55360003811586, 64.31676382422822, 64.61944232451759, 63.798093780913604, 62.416374990237664, 63.75090337816975, 64.7703370886244, 66.60804676680534, 68.00151165066389, 68.42469898884255]
Inverse Log Transformed Input Sequence:
[37.99999999999999, 27.0, 31.0, 34.00000000000001, 33.0, 29.000000000000004, 38.99999999999998, 27.999999999999996, 36.99999999999999, 39.99999999999998, 38.99999999999998, 46.99999999999999, 42.00000000000001, 41.00000000000001, 39.99999999999998]
Latent Space Output for each column:
[array([59.684406], dtype=float32), array([59.42213], dtype=float32), array([62.88981], dtype=float32), array([62.999424], dtype=float32), array([61.287373], dtype=float32), array([63.039715], dtype=float32), array([65.03747], dtype=float32), array([65.03371], dtype=float32), array([65.32792], dtype=float32), array([63.944664], dtype=float32), arra

In [None]:
latent_space_op_array

[array([59.684406], dtype=float32),
 array([59.42213], dtype=float32),
 array([62.88981], dtype=float32),
 array([62.999424], dtype=float32),
 array([61.287373], dtype=float32),
 array([63.039715], dtype=float32),
 array([65.03747], dtype=float32),
 array([65.03371], dtype=float32),
 array([65.32792], dtype=float32),
 array([63.944664], dtype=float32),
 array([62.20658], dtype=float32)]

In [None]:
# inverse_log_transformed_predictions = []
# inverse_log_transformed_input_sequence = []

# # Loop over the denormalized predictions and input sequences
# for prediction, input_val in zip(denormalized_predictions, denormalized_input_sequence):
#     # Apply inverse log transform to the values and store them in the arrays
#     inverse_log_transformed_predictions.append(inverse_log_transform(prediction))
#     inverse_log_transformed_input_sequence.append(inverse_log_transform(input_val))

# Create a list of headers for the CSV file
headers = ['denormalized_predictions', 'denormalized_input_sequence']

# Combine the lists into a single array
combined_data = np.column_stack((denormalized_predictions, denormalized_input_sequence))

# Save the data to a CSV file
output_file = 'output_data.csv'
with open(output_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    # Write the headers
    writer.writerow(headers)
    # Write the data below each header
    writer.writerows(combined_data)

print("Data saved to", output_file)








Data saved to output_data.csv


In [None]:
data = [['Latent Space Output for each column:']]
for item in latent_space_op_array:
    data.append([item[0]])

# Write data to CSV file
csv_filename = 'latent_space_output.csv'
with open(csv_filename, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerows(data)


#AVGFreq