In [1]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, f1_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Input
import optuna
from datetime import datetime
import time


In [2]:
notebook_start_time = datetime.now()
print(f"Notebook started at: {notebook_start_time}")

# Load dataset
df = pd.read_csv('complete_decimal_dataset.csv')

# Encode target variable
label_encoder = LabelEncoder()
df['specific_class_encoded'] = label_encoder.fit_transform(df['specific_class'])

# Prepare features and target
X = df.drop(columns=['label', 'category', 'specific_class', 'specific_class_encoded'])
y = df['specific_class_encoded']

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split for evaluation
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, stratify=y, random_state=42)


Notebook started at: 2025-01-21 18:29:07.393839


In [3]:

def create_dnn_model(input_dim, layers, units, dropout_rate, learning_rate):
    model = Sequential()
    model.add(Input(shape=(input_dim,)))
    model.add(Dense(units, input_dim=input_dim, activation='relu'))
    model.add(Dropout(dropout_rate))
    
    for _ in range(layers - 1):
        model.add(Dense(units, activation='relu'))
        model.add(Dropout(dropout_rate))
    
    model.add(Dense(len(np.unique(y)), activation='softmax'))  # Output layer for multi-class classification
    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model


In [4]:

def optimize_dnn(trial):
    # Hyperparameters to optimize
    layers = trial.suggest_int('layers', 1, 5)
    units = trial.suggest_int('units', 16, 128, step=16)
    dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5)
    learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-2, log=True)
    batch_size = trial.suggest_int('batch_size', 16, 128, step=16)
    epochs = trial.suggest_int('epochs', 10, 50)
    
    # Create the model
    model = create_dnn_model(X_train.shape[1], layers, units, dropout_rate, learning_rate)
    
    #Stratified K-Fold Cross-Validation
    #Convert to NumPy arrays
    X_train_np = np.array(X_train)  
    y_train_np = np.array(y_train)  
    
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    scores = []
    for train_idx, val_idx in skf.split(X_train_np, y_train_np):
        X_ktrain, X_kval = X_train_np[train_idx], X_train_np[val_idx]
        y_ktrain, y_kval = y_train_np[train_idx], y_train_np[val_idx]
    
        # Train the model 
        model.fit(X_ktrain, y_ktrain, epochs=epochs, batch_size=batch_size, verbose=0)
    
        # Evaluate on validation data
        _, accuracy = model.evaluate(X_kval, y_kval, verbose=0)
        scores.append(accuracy)



    

    return np.mean(scores)

# Run the Optuna study
study_dnn = optuna.create_study(direction="maximize")
study_dnn.optimize(optimize_dnn, n_trials=10, n_jobs=-1)

# Best hyperparameters
print("Best hyperparameters:", study_dnn.best_params)


[I 2025-01-21 18:29:09,666] A new study created in memory with name: no-name-97984fac-83be-4de2-b560-ea34eb5ec88d
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
[I 2025-01-21 19:30:26,500] Trial 7 finished with value: 0.9999920129776001 and parameters: {'layers': 3, 'units': 48, 'dropout_rate': 0.21452488962601535, 'learning_rate': 0.00012574537075716414, 'batch_size': 96, 'epochs': 10}. Best is trial 7 with value: 0.9999920129776001.
[I 2025-01-21 19:57:01,174] Trial 1 finished with value: 0.9808321595191956 and parameters: {'layers': 3, 'units': 16, 'dropout_rate': 0.3945862731213424, 'learning_rate': 0.0034230410862882417, 'batch_size': 112, 'epochs': 18}. Best is trial 7 with value: 0.9999920129776001.
[I 2025-01-21 20:22:05,453] Trial 4 finished with value: 0.9999920129776001 and parameters: {'layers': 4, 'units': 128, 'dropout_rate': 0.41467009471348093, 'learning_rate': 0.0015291829865857602, 'batch_size': 128, 'epochs': 24}. Best is trial 7 with value: 

Best hyperparameters: {'layers': 4, 'units': 128, 'dropout_rate': 0.2774088047541644, 'learning_rate': 0.0009501230980605523, 'batch_size': 112, 'epochs': 45}


In [5]:

# Extract best parameters
best_params = study_dnn.best_params

# Train the final model with the best parameters
final_model = create_dnn_model(X_train.shape[1],
                               layers=best_params['layers'],
                               units=best_params['units'],
                               dropout_rate=best_params['dropout_rate'],
                               learning_rate=best_params['learning_rate'])

start_time = time.time()
# Train on full training set
final_model.fit(X_train, y_train, epochs=best_params['epochs'], batch_size=best_params['batch_size'], verbose=1)

end_time = time.time()

training_duration = end_time - start_time
print(f"Model training time: {training_duration:.2f} seconds")

# Evaluate on the test set
y_pred = np.argmax(final_model.predict(X_test), axis=1)
report = classification_report(y_test, y_pred)
print("Classification Report on Test Data:\n", report)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/45
[1m10059/10059[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 2ms/step - accuracy: 0.9873 - loss: 0.0410
Epoch 2/45
[1m10059/10059[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 2ms/step - accuracy: 0.9999 - loss: 5.1067e-04
Epoch 3/45
[1m10059/10059[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 2ms/step - accuracy: 0.9999 - loss: 6.2611e-04
Epoch 4/45
[1m10059/10059[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 2ms/step - accuracy: 0.9999 - loss: 6.3112e-04
Epoch 5/45
[1m10059/10059[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 2ms/step - accuracy: 0.9999 - loss: 4.4573e-04
Epoch 6/45
[1m10059/10059[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 2ms/step - accuracy: 0.9999 - loss: 3.4542e-04
Epoch 7/45
[1m10059/10059[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 2ms/step - accuracy: 0.9999 - loss: 5.6483e-04
Epoch 8/45
[1m10059/10059[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 2ms/step - accuracy

In [6]:
# Log the end time
notebook_end_time = datetime.now()
print(f"Notebook ended at: {notebook_end_time}")

# Calculate the total duration
notebook_duration = notebook_end_time - notebook_start_time
print(f"Total notebook runtime: {notebook_duration}")

Notebook ended at: 2025-01-21 22:51:11.003978
Total notebook runtime: 4:22:03.610139
