In [1]:
import pandas as pd
import numpy as np
import pickle
import sklearn
import tensorflow as tf
import torch
import torch.nn as nn
import torch.utils 
from torch.utils.data import Dataset,DataLoader
import torch.utils.data
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras import regularizers
from tensorflow.keras import layers, losses
from tensorflow.keras.datasets import mnist


2024-07-15 21:20:34.059226: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-15 21:20:34.076653: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-15 21:20:34.081984: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-07-15 21:20:34.096167: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
# Definition of the Autoencoder model as a subclass of the TensorFlow Model class

class SimpleAutoencoder(Model):
	def __init__(self,latent_dimensions , data_shape):
		super(SimpleAutoencoder, self).__init__()
		self.latent_dimensions = latent_dimensions
		self.data_shape = data_shape

		# Encoder architecture using a Sequential model
		self.encoder = tf.keras.Sequential([
			layers.Flatten(),
			layers.Dense(latent_dimensions, activation='relu', dtype='float32'),
		])

		# Decoder architecture using another Sequential model
		self.decoder = tf.keras.Sequential([
			layers.Dense(tf.math.reduce_prod(data_shape), activation='sigmoid', dtype='float32'),
			layers.Reshape(data_shape)
		])

	# Forward pass method defining the encoding and decoding steps
	def call(self, input_data):
		encoded_data = self.encoder(input_data)
		decoded_data = self.decoder(encoded_data)
		return decoded_data


In [2]:
class AutoEncoder(tf.keras.Model):
    def __init__(self):
        super(AutoEncoder, self).__init__()
        self.flatten_layer  =tf.keras.layers.Flatten()
        self.dense1 = tf.keras.layers.Dense(64, activation=tf.nn.relu)
        self.dense2 = tf.keras.layers.Dense(32, activation=tf.nn.relu)
        
        
        self.bottleneck = tf.keras.layers.Dense(16, activation=tf.nn.relu)
    
        self.dense4 = tf.keras.layers.Dense(32, activation=tf.nn.relu)
        self.dense5 = tf.keras.layers.Dense(64, activation=tf.nn.relu)
        
        self.dense_final = tf.keras.layers.Dense(784)
    
    def call(self, inp):
        x_reshaped = self.flatten_layer(inp)
        #print(x_reshaped.shape)
        x = self.dense1(x_reshaped)
        x = self.dense2(x)
        x = self.bottleneck(x)
        x_hid= x
        x = self.dense4(x)
        x = self.dense5(x)
        x = self.dense_final(x)
        return x, x_reshaped,x_hid

# define loss function and gradient
def loss(x, x_bar, h, model, Lambda =100):
    reconstruction_loss = tf.reduce_mean( 
                tf.keras.losses.mse(x, x_bar) 
            ) 
    reconstruction_loss *= 28 * 28
    W= tf.Variable(model.bottleneck.weights[0])
    dh = h * (1 - h)  # N_batch x N_hidden
    W = tf.transpose(W)
    contractive = Lambda * tf.reduce_sum(tf.linalg.matmul(dh**2,
                                                          tf.square(W)),
                                         axis=1)
    total_loss = reconstruction_loss + contractive
    return total_loss

def grad(model, inputs):
    with tf.GradientTape() as tape:
        reconstruction, inputs_reshaped, hidden = model(inputs)
        loss_value = loss(inputs_reshaped, reconstruction, hidden, model)
    return loss_value, tape.gradient(loss_value, model.trainable_variables), inputs_reshaped, reconstruction


In [3]:
dataset_path = "dataset_amyloid.pickle"
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = 'cpu'
print(pd.__version__)
print(sklearn.__version__)


with open(dataset_path, 'rb') as f:
    X_train, y_train, X_test, y_test = pickle.load(f)
# X_train = X_train.astype('int32') 
# X_test = X_test.astype('int32') 
X_train = X_train.astype('float32') / 255.
X_test = X_test.astype('float32') / 255.

# train the model
# model = AutoEncoder()
# optimizer = tf.optimizers.Adam(learning_rate=0.001)
# global_step = tf.Variable(0)
# num_epochs = 10
# batch_size = 32
# for epoch in range(num_epochs):
#     print("Epoch: ", epoch)
#     for x in range(0, len(X_train), batch_size):
#         x_inp = X_train[x : x + batch_size]
#         loss_value, grads, inputs_reshaped, reconstruction = grad(model, x_inp)
#         optimizer.apply_gradients(zip(grads, model.trainable_variables),
#                               global_step)
        
#     print("Step: {}, Loss: {}".format(global_step.numpy(),tf.reduce_sum(loss_value)))


# # Define the optimizer
# optimizer = tf.optimizers.Adam()

# # Define the loss function
# loss_fn = tf.keras.losses.MeanSquaredError()

# # Function to calculate gradients
# def grad(model, inputs):
#     with tf.GradientTape() as tape:
#         reconstruction = model(inputs)
#         # Ensure reconstruction is a tensor, not a tuple
#         if isinstance(reconstruction, tuple):
#             reconstruction = reconstruction[0]
#         print(f"Inputs shape: {inputs.shape}")
#         print(f"Reconstruction shape: {reconstruction.shape}")
#         loss_value = loss_fn(inputs, reconstruction)
#     grads = tape.gradient(loss_value, model.trainable_variables)
#     return loss_value, grads, inputs, reconstruction

# global_step = tf.Variable(0)
# num_epochs = 200
# batch_size = 128

# # Ensure X_train is a numpy array with correct dtype
# X_train = np.array(X_train, dtype=np.float32)

# # Check the shape of the data
# print(f"X_train shape: {X_train.shape}")

# for epoch in range(num_epochs):
#     print("Epoch: ", epoch)
#     for x in range(0, len(X_train), batch_size):
#         x_inp = X_train[x : x + batch_size]
#         loss_value, grads, inputs_reshaped, reconstruction = grad(model, x_inp)
#         optimizer.apply_gradients(zip(grads, model.trainable_variables))
        
#     print("Step: {}, Loss: {}".format(global_step.numpy(), tf.reduce_sum(loss_value)))

#--------------------
input_data_shape = X_test.shape[1:]
latent_dimensions = 64

simple_autoencoder = SimpleAutoencoder(latent_dimensions, input_data_shape)
simple_autoencoder.compile(optimizer='adam', loss=losses.MeanSquaredError())
simple_autoencoder.fit(X_train, y_train,
				epochs=1,
				shuffle=True,
				validation_data=(X_test, y_test))


True
NVIDIA A100-SXM4-80GB
2.2.2
1.3.2


NameError: name 'SimpleAutoencoder' is not defined

In [4]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)
print(y_train[1:])
print(type(y_train))
print(y_train.dtype)
y_train = y_train.astype('float32')
y_test = y_test.astype('float32')

(155, 10193)
(155,)
(35, 10193)
(35,)
[1 1 1 0 1 1 0 0 1 0 0 1 0 0 1 1 0 0 1 0 0 0 0 1 1 0 1 1 1 0 0 0 0 0 0 1 1
 0 0 0 0 1 1 0 1 0 1 1 0 0 0 0 1 0 0 0 0 0 1 1 1 1 0 0 1 0 1 1 1 0 0 1 1 0
 1 0 0 1 0 0 0 1 1 0 0 0 1 0 1 0 1 0 0 0 0 0 0 1 0 0 1 0 0 1 0 0 0 0 0 0 0
 0 0 0 0 0 1 1 1 0 1 1 0 1 0 1 1 1 0 0 1 0 1 0 1 0 1 0 0 1 1 0 0 1 0 1 0 0
 1 1 0 0 1 1]
<class 'numpy.ndarray'>
int64
