In [1]:
!pip install numpy scikit-learn tensorflow



In [2]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense

In [3]:
# Load and preprocess the Breast Cancer dataset
data = load_breast_cancer()
X, y = data.data, data.target
X = StandardScaler().fit_transform(X)  

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
# Set the target dimension for dimensionality reduction
k = 10  

# Define a deeper autoencoder model
input_dim = X_train.shape[1]
encoding_dim = k

input_layer = Input(shape=(input_dim,))
encoder = Dense(128, activation="relu")(input_layer)
encoder = Dense(64, activation="relu")(encoder)
encoder = Dense(32, activation="relu")(encoder)
encoder = Dense(16, activation="relu")(encoder)
encoder = Dense(encoding_dim, activation="relu")(encoder)  # Bottleneck layer

decoder = Dense(16, activation="relu")(encoder)
decoder = Dense(32, activation="relu")(decoder)
decoder = Dense(64, activation="relu")(decoder)
decoder = Dense(128, activation="relu")(decoder)
decoder = Dense(input_dim, activation="sigmoid")(decoder)

autoencoder = Model(inputs=input_layer, outputs=decoder)
encoder_model = Model(inputs=input_layer, outputs=encoder)

In [5]:
# Compile and train the autoencoder
autoencoder.compile(optimizer='adam', loss='mse')
autoencoder.fit(X_train, X_train, epochs=100, batch_size=16, shuffle=True, validation_data=(X_test, X_test))

Epoch 1/100
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 15ms/step - loss: 1.1972 - val_loss: 0.7991
Epoch 2/100
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.7794 - val_loss: 0.7628
Epoch 3/100
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.7612 - val_loss: 0.6887
Epoch 4/100
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.7779 - val_loss: 0.6741
Epoch 5/100
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.7621 - val_loss: 0.6642
Epoch 6/100
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.7420 - val_loss: 0.6519
Epoch 7/100
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.7049 - val_loss: 0.6384
Epoch 8/100
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.6686 - val_loss: 0.6235
Epoch 9/100
[1m29/29[0m [32m━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7e20921607f0>

In [9]:
# Print model summary
autoencoder.summary()

In [6]:
# Generate k-dimensional features using the encoder
X_train_autoencoded = encoder_model.predict(X_train)
X_test_autoencoded = encoder_model.predict(X_test)

[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 


In [7]:
# Apply PCA for dimensionality reduction to k dimensions
pca = PCA(n_components=k)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

In [8]:
# Using autoencoded features
clf_autoencoder = RandomForestClassifier(random_state=42)
clf_autoencoder.fit(X_train_autoencoded, y_train)
y_pred_autoencoder = clf_autoencoder.predict(X_test_autoencoded)
accuracy_autoencoder = accuracy_score(y_test, y_pred_autoencoder)

# Using PCA features
clf_pca = RandomForestClassifier(random_state=42)
clf_pca.fit(X_train_pca, y_train)
y_pred_pca = clf_pca.predict(X_test_pca)
accuracy_pca = accuracy_score(y_test, y_pred_pca)

print(f"Accuracy with Deeper Autoencoder (k={k}): {accuracy_autoencoder:.2f}")
print(f"Accuracy with PCA (k={k}): {accuracy_pca:.2f}")

Accuracy with Deeper Autoencoder (k=10): 0.94
Accuracy with PCA (k=10): 0.96
