In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
df1 = pd.read_csv('class_labels.csv')
df2 = pd.read_csv('go_features.csv')
df3 = pd.read_csv('gtex_features.csv')
df4 = pd.read_csv('kegg_features.csv')
df5 = pd.read_csv('pathdipall_features.csv')
df6 = pd.read_csv('ppi_features.csv')

merged_df = pd.merge(df1, df2, on='entrezId')
merged_df = pd.merge(merged_df, df3, on='entrezId')
merged_df = pd.merge(merged_df, df4, on='entrezId')
merged_df = pd.merge(merged_df, df5, on='entrezId')
merged_df = pd.merge(merged_df, df6, on='entrezId')

merged_df.fillna(0, inplace=True)
merged_df.to_csv('merged_dataset.csv', index=False)

dataset = pd.read_csv('merged_dataset.csv')
X = dataset.iloc[:, [0] + list(range(2, dataset.shape[1]))].values
Y = dataset.iloc[:, 1].values

# Feature extraction using PCA
num_components = 16  # Adjust the number of PCA components as needed
pca = PCA(n_components=num_components)
X_transformed = pca.fit_transform(X)

# Reshape the transformed data into images (assuming each row has the same number of components)
num_rows, num_components = X_transformed.shape
num_pixels = int(np.sqrt(num_components))  # Assuming the data can be represented as square images
X_images = X_transformed.reshape(-1, num_pixels, num_pixels, 1)  # Reshape to (num_rows, num_pixels, num_pixels, 1)

# Convert labels to one-hot encoding
num_classes = len(np.unique(Y))
Y_onehot = tf.keras.utils.to_categorical(Y, num_classes)

# Split the data into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X_images, Y_onehot, test_size=0.25, random_state=0)

model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(num_pixels, num_pixels, 1)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
batch_size = 32
epochs = 20  # Set the number of epochs here
model.fit(X_train, Y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1)

# Evaluate the model on the test set
Y_pred_onehot = model.predict(X_test)
Y_pred_labels = np.argmax(Y_pred_onehot, axis=1)  # Convert one-hot predictions to class labels
Y_test_labels = np.argmax(Y_test, axis=1)  # Convert one-hot ground truth to class labels



Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [2]:
accuracy = accuracy_score(Y_test_labels, Y_pred_labels)
print("Accuracy: {:.2f}%".format(accuracy * 100))

Accuracy: 98.71%


In [10]:
#Testing with new data
#entrezId = 5728 means no alzheimer lets see if get the desired output

merged_new_data = pd.read_csv('Test.csv')
specified_entrezId = 5728
selected_row = merged_new_data.query("entrezId == @specified_entrezId")
# Extract features using PCA (using the same PCA object from the training step)
X_new = selected_row.iloc[:, [0] + list(range(2, selected_row.shape[1]))].values
X_transformed_new = pca.transform(X_new)

# Reshape the transformed data into images
X_images_new = X_transformed_new.reshape(-1, num_pixels, num_pixels, 1)

# Assuming the model is already trained and loaded, as shown in your code
# Make predictions on the new data
Y_pred_onehot_new = model.predict(X_images_new)
Y_pred_labels_new = np.argmax(Y_pred_onehot_new, axis=1)  # Convert one-hot predictions to class labels

# Assuming '1' is the class label for Alzheimer's and '0' is the class label for non-Alzheimer's
# You can use the following line to get a boolean array indicating whether each sample is predicted to have Alzheimer's or not
predicted_has_alzheimer = Y_pred_labels_new == 1

# Print the prediction for each sample in the new data
print(predicted_has_alzheimer)


[False]
