## 🎯 Goal:
- Use an autoencoder to compress image features, then use those compressed vectors as input to SVC, RF, or other ML models.

In [10]:
import pandas as pd

train_df = pd.read_csv("/kaggle/input/digit-recognizer/train.csv")
X = train_df.drop('label', axis=1) / 255.0
y = train_df['label']

# zero mask
zero_mask = (X == 0).sum(axis=0) / len(X) > 0.95

# Apply zero_mask to remove sparse pixels
X = X.loc[:, ~zero_mask]

In [11]:
from sklearn.model_selection import train_test_split

# split the data
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1, stratify=y, random_state=42)

In [12]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras import regularizers

input_dim = X_train.shape[1]
encoding_dim = 64  # or 32, you can tune this

input_img = Input(shape=(input_dim,))
encoded = Dense(128, activation='relu')(input_img)
encoded = Dense(encoding_dim, activation='relu')(encoded)

decoded = Dense(128, activation='relu')(encoded)
decoded = Dense(input_dim, activation='sigmoid')(decoded)

autoencoder = Model(input_img, decoded)
encoder = Model(input_img, encoded)

autoencoder.compile(optimizer='adam', loss='mse')
autoencoder.summary()

In [13]:
autoencoder.fit(X_train, X_train,
                epochs=100,
                batch_size=512,
                shuffle=True,
                validation_data=(X_val, X_val))

Epoch 1/100
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 13ms/step - loss: 0.1577 - val_loss: 0.0772
Epoch 2/100
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 0.0680 - val_loss: 0.0475
Epoch 3/100
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 0.0440 - val_loss: 0.0352
Epoch 4/100
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 0.0336 - val_loss: 0.0292
Epoch 5/100
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 0.0284 - val_loss: 0.0262
Epoch 6/100
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 0.0255 - val_loss: 0.0237
Epoch 7/100
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 0.0232 - val_loss: 0.0223
Epoch 8/100
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 0.0216 - val_loss: 0.0206
Epoch 9/100
[1m74/74[0m [32m━━━━━━━━━

<keras.src.callbacks.history.History at 0x789ab0a67cd0>

In [14]:
X_train_enc = encoder.predict(X_train)
X_val_enc = encoder.predict(X_val)

[1m1182/1182[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step
[1m132/132[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


In [20]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

svc = SVC(C=10, kernel='rbf', gamma='scale', probability=True)
svc.fit(X_train_enc, y_train)
y_pred = svc.predict(X_val_enc)

print("📊 Accuracy:", accuracy_score(y_val, y_pred))
print("📄 Report:\n", classification_report(y_val, y_pred))

📊 Accuracy: 0.9754761904761905
📄 Report:
               precision    recall  f1-score   support

           0       0.98      0.98      0.98       413
           1       0.99      0.99      0.99       468
           2       0.97      0.98      0.97       418
           3       0.97      0.96      0.97       435
           4       0.99      0.99      0.99       407
           5       0.95      0.97      0.96       380
           6       0.98      0.99      0.98       414
           7       0.98      0.98      0.98       440
           8       0.97      0.95      0.96       406
           9       0.96      0.96      0.96       419

    accuracy                           0.98      4200
   macro avg       0.98      0.98      0.98      4200
weighted avg       0.98      0.98      0.98      4200



In [16]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

rf = RandomForestClassifier(n_estimators=200, max_depth=20, random_state=42)
rf.fit(X_train_enc, y_train)

y_pred_rf = rf.predict(X_val_enc)
print("🌳 RF Accuracy:", accuracy_score(y_val, y_pred_rf))
print(classification_report(y_val, y_pred_rf))

🌳 RF Accuracy: 0.9276190476190476
              precision    recall  f1-score   support

           0       0.97      0.96      0.96       413
           1       0.99      0.99      0.99       468
           2       0.93      0.93      0.93       418
           3       0.87      0.89      0.88       435
           4       0.94      0.93      0.93       407
           5       0.89      0.88      0.89       380
           6       0.92      0.97      0.94       414
           7       0.93      0.95      0.94       440
           8       0.92      0.85      0.89       406
           9       0.91      0.92      0.92       419

    accuracy                           0.93      4200
   macro avg       0.93      0.93      0.93      4200
weighted avg       0.93      0.93      0.93      4200



In [17]:
from catboost import CatBoostClassifier

cat = CatBoostClassifier(
    iterations=300,
    learning_rate=0.1,
    depth=6,
    verbose=0,
    random_state=42
)
cat.fit(X_train_enc, y_train)

y_pred_cat = cat.predict(X_val_enc)
print("🐱 CatBoost Accuracy:", accuracy_score(y_val, y_pred_cat))
print(classification_report(y_val, y_pred_cat))

🐱 CatBoost Accuracy: 0.9269047619047619
              precision    recall  f1-score   support

           0       0.97      0.96      0.96       413
           1       0.97      0.98      0.98       468
           2       0.93      0.92      0.92       418
           3       0.91      0.91      0.91       435
           4       0.95      0.93      0.94       407
           5       0.88      0.91      0.89       380
           6       0.93      0.97      0.95       414
           7       0.93      0.93      0.93       440
           8       0.92      0.85      0.88       406
           9       0.89      0.91      0.90       419

    accuracy                           0.93      4200
   macro avg       0.93      0.93      0.93      4200
weighted avg       0.93      0.93      0.93      4200



In [21]:
from sklearn.ensemble import VotingClassifier

voting_clf = VotingClassifier(
    estimators=[
        ('svc',svc),
        ('rf', rf),
        ('cat', cat)
    ],
    voting='soft'  # use predicted probabilities
)
voting_clf.fit(X_train_enc, y_train)

y_pred_vote = voting_clf.predict(X_val_enc)
print("🗳️ Voting Accuracy:", accuracy_score(y_val, y_pred_vote))
print(classification_report(y_val, y_pred_vote))

🗳️ Voting Accuracy: 0.9726190476190476
              precision    recall  f1-score   support

           0       0.99      0.98      0.98       413
           1       0.99      1.00      0.99       468
           2       0.97      0.97      0.97       418
           3       0.97      0.95      0.96       435
           4       0.99      0.99      0.99       407
           5       0.95      0.96      0.95       380
           6       0.97      0.99      0.98       414
           7       0.98      0.98      0.98       440
           8       0.97      0.95      0.96       406
           9       0.96      0.96      0.96       419

    accuracy                           0.97      4200
   macro avg       0.97      0.97      0.97      4200
weighted avg       0.97      0.97      0.97      4200



In [22]:
!pip install xgboost



In [25]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report

mlp = MLPClassifier(hidden_layer_sizes=(128, 64),
                    activation='relu',
                    solver='adam',
                    learning_rate='adaptive',
                    alpha=1e-3,
                    max_iter=300,
                    random_state=42)

mlp.fit(X_train_enc, y_train)

y_pred_mlp = mlp.predict(X_val_enc)
acc = accuracy_score(y_val, y_pred_mlp)

print("🧠 MLPClassifier Accuracy:", acc)
print("📄 Classification Report:")
print(classification_report(y_val, y_pred_mlp))

🧠 MLPClassifier Accuracy: 0.9704761904761905
📄 Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.96      0.98       413
           1       0.99      0.99      0.99       468
           2       0.97      0.97      0.97       418
           3       0.98      0.95      0.97       435
           4       0.96      1.00      0.98       407
           5       0.94      0.96      0.95       380
           6       0.95      1.00      0.97       414
           7       0.98      0.97      0.98       440
           8       0.98      0.94      0.96       406
           9       0.96      0.95      0.95       419

    accuracy                           0.97      4200
   macro avg       0.97      0.97      0.97      4200
weighted avg       0.97      0.97      0.97      4200



In [26]:
from sklearn.model_selection import RandomizedSearchCV

param_dist = {
    'hidden_layer_sizes': [(64,), (128,), (128, 64), (256, 128)],
    'activation': ['relu', 'tanh'],
    'alpha': [1e-4, 1e-3, 1e-2],
    'learning_rate': ['constant', 'adaptive']
}

mlp_base = MLPClassifier(max_iter=300, random_state=42)

search = RandomizedSearchCV(mlp_base, param_dist, n_iter=5, cv=3,
                            scoring='accuracy', verbose=2, n_jobs=-1)
search.fit(X_train_enc, y_train)

Fitting 3 folds for each of 5 candidates, totalling 15 fits


[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(256, 128), learning_rate=adaptive; total time= 2.0min
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(256, 128), learning_rate=adaptive; total time= 3.4min
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(128,), learning_rate=adaptive; total time= 3.4min
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(64,), learning_rate=adaptive; total time=  58.7s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(256, 128), learning_rate=adaptive; total time= 2.1min
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(128,), learning_rate=adaptive; total time= 4.4min
[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(256, 128), learning_rate=adaptive; total time= 1.8min
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(64,), learning_rate=adaptive; total time= 1.1min
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(256, 128), learning_rate=adaptive; total time= 1.7min

In [27]:
search.best_params_

{'learning_rate': 'adaptive',
 'hidden_layer_sizes': (256, 128),
 'alpha': 0.0001,
 'activation': 'relu'}

In [28]:
print(f"✅ Best CV Accuracy: {search.best_score_:.4f}")

✅ Best CV Accuracy: 0.9733
