In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import tensorflow as tf
import scipy
import glob
import sklearn 
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from tensorflow import keras
from keras import layers, models, optimizers
from tensorflow.keras.layers import Input, Activation, Dense, LeakyReLU
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.callbacks import ModelCheckpoint, Callback
from tensorflow.keras.optimizers import Adam, SGD
from keras_tuner import BayesianOptimization, HyperParameters

In [None]:
# Data import
wt_filtered = ['wt_filtered_lcc_3_50.lccdata', 'wt_filtered_2_lcc_12_50.lccdata', 'wt_filtered_lcc_20_50.lccdata']

# filtered wt LCC data import
wt_f_var_names = ['wt_3f', 'wt_12f', 'wt_20f']

for var, file in zip(wt_f_var_names, wt_filtered):
    globals()[var] = pd.read_csv(file, sep='\t').drop(columns='Unnamed: 0')

# filtered mutant LCC data import
D132H_filtered = ['D132H_filtered_lcc_3_50.lccdata', 'D132H_filtered_2_lcc_12_50.lccdata', 'D132H_filtered_lcc_20_50.lccdata']
D132H_f_var_names = ['D132H_3f', 'D132H_12f', 'D132H_20f']

for var, file in zip(D132H_f_var_names, D132H_filtered):
    globals()[var] = pd.read_csv(file, sep='\t').drop(columns='Unnamed: 0')
    
# Visualization of dataset
print('WT for window size = 3')
display(wt_3f)
print('WT for window size = 12')
display(wt_12f)
print('WT for window size = 20')
display(wt_20f)

print('\n')
print('---------------------------------')
print('D132H for window size = 3')
display(D132H_3f)
print('D132H for window size = 12')
display(D132H_12f)
print('D132H for window size = 20')
display(D132H_20f)

In [None]:
# Concateneate wt and mutant dataframes and rename columns

wt_f = pd.concat([wt_12f, wt_20f, wt_3f], axis = 1)
    
D132H_f = pd.concat([D132H_12f, D132H_20f, D132H_3f], axis = 1)

colnames = [*range(0,10)]
colnames
wt_f.columns = colnames
D132H_f.columns = colnames

# Visualization of dataset
print('Filtered wt data')
display(wt_f)

print('\n')
print('---------------------------------')
print('Filtered D132H data')
display(D132H_f)

In [None]:
# Data pre processing

def preprocessing(wt, mutant):
    
    wt_label = np.zeros(len(wt)) # Set wt labels to 0
    
    mutant_label = np.ones(len(mutant))
    
    # Concatenate data frames and label arrays

    X_train_full = pd.concat([wt.reset_index(), mutant.reset_index()])
    y_train_full = np.concatenate((wt_label, mutant_label))
    
    #Drop index column and normalise training data
    X_train_full = X_train_full.drop(columns = 'index')
    
    X_train_full= X_train_full.div(100) ## When changed from 100 to 56.1035, errors generated.
    
    # Separate training and validation sets and print relevant shapes
    X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_full, stratify=y_train_full, test_size=0.2)
    
    print(X_train.shape)
    print(X_valid.shape)
    print(y_train.shape)
    print(y_valid.shape)
    
    return X_train, X_valid, y_train, y_valid

In [None]:
X_train_f, X_valid_f, y_train_f, y_valid_f = preprocessing(wt_f, D132H_f)

In [None]:
# Get autoencoder model
def get_ae(train_data, LeReLU_alpha=0.01):
    
    #Input layer
    input_layer = Input(shape=(train_data.shape[1]), name='ae_input')
    
    encoder = Dense(256, activation=LeakyReLU(alpha=LeReLU_alpha), name='e1')(input_layer)
    encoder = Dense(64, activation=LeakyReLU(alpha=LeReLU_alpha), name='e2')(encoder)

    encoded = Dense(2, activation=LeakyReLU(alpha=LeReLU_alpha), name='ae_latent')(encoder)
    
    decoder = Dense(64, activation=LeakyReLU(alpha=LeReLU_alpha), name='d1')(encoded)
    decoder = Dense(256, activation=LeakyReLU(alpha=LeReLU_alpha), name='d2')(decoder)

    output_layer = Dense(train_data.shape[1], activation=LeakyReLU(alpha=LeReLU_alpha), name='ae_output')(decoder)
    
    model = Model(input_layer, output_layer)

    return model

In [None]:
# Get ae for filtered data
autoencoder = get_ae(X_train_f)

In [None]:
# Print summary of ae model
autoencoder.summary()

In [None]:
# Compile the model
autoencoder.compile(loss=tf.keras.losses.MeanSquaredError(), optimizer=tf.keras.optimizers.Adam(learning_rate = 0.005))

In [None]:
## Template to copy and paste
from tqdm import tqdm
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import seaborn as sns
import numpy as np
import os 

# New folder name
folder_name = "AE_UT_MO_Trial_#"

# Check if the folder exists. If not, create it.
if not os.path.exists(folder_name):
    os.makedirs(folder_name)

for counts in tqdm(range(59)):  # this determines the number of epoch sets
    # Open a file for this epoch set to write outputs
    name = "#_LSP_MO_UT"
    output_path = os.path.join(folder_name, f"#_LSP_MO_UT_Predictions_{counts}.txt")  # Update the path
    with open(output_path, "w") as file:
        # Autoencoder training
        history = autoencoder.fit(X_train_f, X_train_f, batch_size=256,
                                  epochs=1000, validation_data=(X_valid_f, X_valid_f), verbose=1)
        
        # Convert history object to dataframe and plot rates
        training_history = pd.DataFrame(history.history)
        plt.plot(training_history)
        file_name_0 = os.path.join(folder_name, name + "_Training_History" + str(counts))
        training_history.to_pickle(file_name_0)
        file_name_1 = os.path.join(folder_name, name + str(counts) + "_#1.png")
        plt.savefig(file_name_1, dpi=300)
        plt.clf()

        # Read in latent layer
        dr_model = tf.keras.models.Model(inputs=autoencoder.get_layer('ae_input').input,
                                         outputs=autoencoder.get_layer('ae_latent').output)
        # Write the model summary to the file
        dr_model.summary(print_fn=lambda x: file.write(x + '\n'))

        # Define batch size for processing validation data
        batch_size = 32

        # Initialize lists to hold prediction results
        x = []
        y = []
        z = []

        # Process validation data in batches
        for batch_start in range(0, len(X_valid_f), batch_size):
            batch_end = min(batch_start + batch_size, len(X_valid_f))
            X_batch = np.array(X_valid_f.iloc[batch_start:batch_end])
            y_batch = y_valid_f[batch_start:batch_end]

            # Make predictions for this batch
            op_batch = dr_model.predict(X_batch, verbose=0)

            # Process and store the results
            for i, op in enumerate(op_batch):
                z.append(y_batch[i])
                x.append(op[0])
                y.append(op[1])
                file.write(f"Prediction {batch_start + i}: {op}\n")

        # Construct and save the data frame
        df = pd.DataFrame()
        df['x'] = x
        df['y'] = y
        df['z'] = ["trajectory-" + str(k) for k in z]

        plt.figure(figsize=(8, 6))
        fig = sns.scatterplot(x='x', y='y', hue='z', data=df, s=10)
        file_name_2 = os.path.join(folder_name, name + str(counts) + "_#2.png")
        fig.figure.savefig(file_name_2, dpi=300)
        plt.clf()

        file_name_3 = os.path.join(folder_name, '#_LSP_MO_UT_Predictions' + str(counts))
        df.to_pickle(file_name_3)

        # Save the model in a subfolder within Trial folder
        model_folder = os.path.join(folder_name, 'models')
        if not os.path.exists(model_folder):
            os.makedirs(model_folder)
        file_name = os.path.join(model_folder, 'saved_model_#_LSP_MO_UT' + str(counts))
        autoencoder.save(file_name)

In [None]:
# check
#print(X_train_f.index.to_list() ==  y_train_f.index.to_list())
#print(X_valid_f.index.to_list() ==  y_valid_f.index.to_list())

# Save index list
shuffled_index_train = X_train_f.index.to_list()
shuffled_index_valid = X_valid_f.index.to_list()

# open a binary file in write mode
file = open("shufftrain", "wb")
# save array to the file
np.save(file, shuffled_index_train)
# close the file
file.close
# open a binary file in write mode
file = open("shuffval", "wb")
# save array to the file
np.save(file, shuffled_index_valid)
# close the file
file.close