In [1]:
# ============================================================
# Folded Autoencoder + Scaling + Random Forest Classification
# (Based on Bhui et al., 2020 ICCCNT)
# ============================================================

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [2]:
# ------------------------------
# Step 1: Load dataset
# ------------------------------
file_path = "MLL_4.xlsx"  # replace with your dataset
data = pd.read_excel(file_path)

# Separate features and class
X = data.drop(columns=['class']).values
y = data['class'].values
input_dim = X.shape[1]
print(f"Input features: {input_dim}, Samples: {X.shape[0]}")


Input features: 12533, Samples: 72


In [3]:
data

Unnamed: 0,31307,31308_at,31309_r_at,31310_at,31311_at,31312_at,31313_at,31314_at,31315_at,31316_at,...,101_at,102_at,103_at,104_at,105_at,106_at,107_at,108_g_at,109_at,class
0,-135.7,-100.1,-94.6,-230,0.6,-50.4,-36.3,139.5,31.6,-32.2,...,-225.2,242.5,101.7,473.1,-59.9,217.9,275.6,-461.6,1115.5,0
1,-80.0,-23.0,-6.0,-145,491.0,290.0,-235.0,41.0,4602.0,-37.0,...,-175.0,143.0,96.0,301.0,-50.0,242.0,222.0,-330.0,2481.0,0
2,-91.0,-130.0,-27.0,-51,236.0,-163.0,-304.0,-35.0,498.0,-56.0,...,-308.0,184.0,-32.0,350.0,-11.0,837.0,174.0,-99.0,376.0,0
3,-144.0,-124.0,-26.0,-139,-88.0,34.0,-411.0,118.0,-239.0,-104.0,...,731.0,106.0,-330.0,-36.0,-190.0,999.0,255.0,-353.0,1603.0,0
4,-89.0,-25.0,-64.0,-112,452.0,183.0,107.0,233.0,38.0,-35.0,...,182.0,426.0,155.0,607.0,50.0,249.0,1635.0,-780.0,1103.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67,-324.0,-168.0,-49.0,312,1059.0,-24.0,-404.0,12.0,101.0,-55.0,...,-422.0,528.0,220.0,643.0,187.0,407.0,-564.0,-1736.0,346.0,2
68,-148.0,-104.0,29.0,72,465.0,162.0,-895.0,33.0,1736.0,38.0,...,128.0,94.0,66.0,556.0,63.0,200.0,120.0,-757.0,825.0,2
69,-230.0,-66.0,-69.0,377,686.0,-44.0,-123.0,7.0,310.0,-119.0,...,-230.0,257.0,71.0,581.0,64.0,35.0,829.0,-2015.0,385.0,2
70,-359.0,-52.0,-147.0,120,564.0,-52.0,-584.0,64.0,2528.0,-90.0,...,-236.0,88.0,94.0,143.0,232.0,434.0,-87.0,-2038.0,1228.0,2


In [4]:
# ------------------------------
# Step 2: Define Folded Autoencoder
# ------------------------------
class FoldedAutoencoder(nn.Module):
    def __init__(self, input_dim):
        super(FoldedAutoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, input_dim // 2),
            nn.ReLU(),
            nn.Linear(input_dim // 2, input_dim // 4),
            nn.ReLU(),
            nn.Linear(input_dim // 4, input_dim // 8),
            nn.ReLU(),
            nn.Linear(input_dim // 8, input_dim // 16),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(input_dim // 16, input_dim // 8),
            nn.ReLU(),
            nn.Linear(input_dim // 8, input_dim // 4),
            nn.ReLU(),
            nn.Linear(input_dim // 4, input_dim // 2),
            nn.ReLU(),
            nn.Linear(input_dim // 2, input_dim)
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return encoded, decoded

In [5]:
# ------------------------------
# Step 3: Train Folded Autoencoder
# ------------------------------
model = FoldedAutoencoder(input_dim)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
epochs = 100
batch_size = 8

X_tensor = torch.tensor(X, dtype=torch.float32)

for epoch in range(epochs):
    permutation = torch.randperm(X_tensor.size(0))
    epoch_loss = 0
    for i in range(0, X_tensor.size(0), batch_size):
        indices = permutation[i:i+batch_size]
        batch_x = X_tensor[indices]
        optimizer.zero_grad()
        encoded, decoded = model(batch_x)
        loss = criterion(decoded, batch_x)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss:.4f}")

Epoch [10/100], Loss: 21523692.5000
Epoch [20/100], Loss: 21308431.1250
Epoch [30/100], Loss: 21308410.1250
Epoch [40/100], Loss: 21788415.1250
Epoch [50/100], Loss: 21371335.3750
Epoch [60/100], Loss: 19014696.5000
Epoch [70/100], Loss: 16416326.0000
Epoch [80/100], Loss: 15897603.3750
Epoch [90/100], Loss: 15432320.7500
Epoch [100/100], Loss: 15296960.5000


In [6]:
# ------------------------------
# Step 4: Extract encoded features
# ------------------------------
with torch.no_grad():
    encoded_features, reconstructed = model(X_tensor)

X_encoded = encoded_features.numpy()
print(f"\nEncoded feature dimension: {X_encoded.shape[1]}")

# Save for reference
encoded_df = pd.DataFrame(X_encoded)
encoded_df['class'] = y
encoded_df.to_excel("encoded_features_FA.xlsx", index=False)
print("âœ… Encoded features saved to 'encoded_features_FA.xlsx'")


Encoded feature dimension: 783
âœ… Encoded features saved to 'encoded_features_FA.xlsx'


In [7]:
# ------------------------------
# Step 5: Scaling between (-1, 1)
# ------------------------------
scaler = MinMaxScaler(feature_range=(-1, 1))
X_scaled = scaler.fit_transform(X_encoded)

In [8]:
# ------------------------------
# Step 6: Randomize the dataset
# ------------------------------
np.random.seed(42)
shuffle_idx = np.random.permutation(len(X_scaled))
X_scaled = X_scaled[shuffle_idx]
y = y[shuffle_idx]

In [9]:
# ------------------------------
# Step 7: Train-Test Split (80:20)
# ------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)

print(f"Train samples: {len(X_train)}, Test samples: {len(X_test)}")

Train samples: 57, Test samples: 15


In [10]:
# ------------------------------
# Step 8: Train Random Forest Classifier
# ------------------------------
rf = RandomForestClassifier(n_estimators=200, random_state=42)
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)

In [11]:
# ------------------------------
# Step 9: Evaluate the model
# ------------------------------
accuracy = accuracy_score(y_test, y_pred)
print(f"\nðŸŽ¯ Random Forest Accuracy: {accuracy*100:.2f}%")

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))


ðŸŽ¯ Random Forest Accuracy: 80.00%

Classification Report:
              precision    recall  f1-score   support

           0       0.80      0.80      0.80         5
           1       0.67      0.50      0.57         4
           2       0.86      1.00      0.92         6

    accuracy                           0.80        15
   macro avg       0.77      0.77      0.76        15
weighted avg       0.79      0.80      0.79        15


Confusion Matrix:
[[4 1 0]
 [1 2 1]
 [0 0 6]]
