In [10]:
import os, zipfile
import pandas as pd
import numpy as np
from google.colab import drive
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam
import tensorflow as tf

SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)

drive.mount('/content/drive', force_remount=True)

zip_path = "/content/drive/MyDrive/FDL 5th Sem Practical Codes/PISTACHIO DATASET.zip"
extract_dir = "/content/pistachio_data"
os.makedirs(extract_dir, exist_ok=True)
with zipfile.ZipFile(zip_path, 'r') as z:
    z.extractall(extract_dir)

dataset_path = os.path.join(extract_dir, "Pistachio_Dataset", "Pistachio_28_Features_Dataset",
                            "Pistachio_28_Features_Dataset.xlsx")
df = pd.read_excel(dataset_path)

X = df.drop(columns=["Class"])
y = LabelEncoder().fit_transform(df["Class"])

X_scaled = StandardScaler().fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=SEED, stratify=y)

input_dim = X_train.shape[1]
encoding_dim = 14
inp = Input(shape=(input_dim,))
enc = Dense(encoding_dim, activation='relu')(inp)
dec = Dense(input_dim, activation='sigmoid')(enc)
autoencoder = Model(inp, dec)
autoencoder.compile(optimizer=Adam(0.001), loss='mse')
autoencoder.fit(X_train, X_train, epochs=20, batch_size=32, validation_data=(X_test, X_test), verbose=1)

encoder = Model(inp, enc)
Z_train = encoder.predict(X_train)
Z_test = encoder.predict(X_test)

clf_raw = LogisticRegression(max_iter=200, random_state=SEED).fit(X_train, y_train)
clf_latent = LogisticRegression(max_iter=200, random_state=SEED).fit(Z_train, y_train)

acc_raw = accuracy_score(y_test, clf_raw.predict(X_test))
acc_latent = accuracy_score(y_test, clf_latent.predict(Z_test))

print(f"Test Reconstruction Loss: {autoencoder.evaluate(X_test, X_test):.4f}")
print(f"Accuracy (Raw features): {acc_raw:.4f}")
print(f"Accuracy (Latent features): {acc_latent:.4f}")
print("Sample latent vectors (first 3 rows):")
print(Z_test[:3])


Mounted at /content/drive
Epoch 1/20
[1m54/54[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 1.2114 - val_loss: 1.1640
Epoch 2/20
[1m54/54[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 1.0836 - val_loss: 1.0293
Epoch 3/20
[1m54/54[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.9554 - val_loss: 0.9196
Epoch 4/20
[1m54/54[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.8590 - val_loss: 0.8555
Epoch 5/20
[1m54/54[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.8000 - val_loss: 0.8149
Epoch 6/20
[1m54/54[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.7610 - val_loss: 0.7861
Epoch 7/20
[1m54/54[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.7329 - val_loss: 0.7642
Epoch 8/20
[1m54/54[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.7114 - val_loss: 0.7478
Epoch 9/20
[1m54/54[0m [32m