Import libaries needed

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV, KFold
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.callbacks import EarlyStopping

In [5]:
# Load and preprocess your dataset containing GLCM features
data = pd.read_csv('final_balanced.csv')  # Assuming your data is in a CSV file
X = data.drop(columns=['label'])  # Features (GLCM properties)
y = data['label']  # Target labels

# Encode labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Split data into train, validation, and test sets
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) #Train: 80, Test: 20
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=42) #Train:80, Val:20


# Standardize features using StandardScaler
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)
x_val = scaler.transform(x_val)

In [6]:
# Define the parameter grid for hyperparameter tuning
param_grid = {
    'hidden_layer_sizes': [(64, 32), (128, 64), (256, 128)],# Different layer configurations
    'activation': ['relu', 'tanh'],  # Activation functions
    'solver': ['adam', 'sgd'], # Optimization solver
    'batch_size': [32, 64], # Batch size
    'max_iter': [50, 100, 150] ,# Number of epochs (max_iter)
}

# Create a neural network classifier
mlp = MLPClassifier() # Base MLPClassifier

# Use KFold for cross-validation within GridSearchCV
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

# Initialize GridSearchCV with MLPClassifier and parameter grid
grid_search = GridSearchCV(estimator=mlp, param_grid=param_grid, cv=kfold, scoring='accuracy')

# Perform grid search with cross-validation
grid_search.fit(x_train, y_train)



In [7]:
# Retrieve the best model and evaluate on the test set
best_model = grid_search.best_estimator_
y_pred = best_model.predict(x_val)

test_accuracy = accuracy_score(y_val, y_pred)
print("Best Model Test Accuracy:", test_accuracy)

conf_matrix = confusion_matrix(y_val, y_pred)
print("Confusion matrix: ")
print(conf_matrix)

class_report = classification_report(y_val, y_pred)
print("Classification Report: ")
print(class_report)

# Best parameters found by GridSearchCV
print("Best Parameters:", grid_search.best_params_)

Best Model Test Accuracy: 0.7908333333333334
Confusion matrix: 
[[339  78  21]
 [ 56 301  14]
 [ 49  33 309]]
Classification Report: 
              precision    recall  f1-score   support

           0       0.76      0.77      0.77       438
           1       0.73      0.81      0.77       371
           2       0.90      0.79      0.84       391

    accuracy                           0.79      1200
   macro avg       0.80      0.79      0.79      1200
weighted avg       0.80      0.79      0.79      1200

Best Parameters: {'activation': 'tanh', 'batch_size': 64, 'hidden_layer_sizes': (64, 32), 'max_iter': 150, 'solver': 'adam'}


In [9]:
# Retrieve and print the accuracy of every parameter combination
results_df = pd.DataFrame(grid_search.cv_results_)
params_cols = ['param_' + param for param in param_grid.keys()]
acc_cols = ['mean_test_score'] + [col for col in results_df.columns if 'split' in col and 'test_score' in col]

# Display accuracy for each parameter combination
print("Accuracy for Each Parameter Combination:")
print(results_df[params_cols + acc_cols])
results_df.to_csv("tuning.csv", index=False)

Accuracy for Each Parameter Combination:
   param_hidden_layer_sizes param_activation param_solver param_batch_size  \
0                  (64, 32)             relu         adam               32   
1                  (64, 32)             relu          sgd               32   
2                  (64, 32)             relu         adam               32   
3                  (64, 32)             relu          sgd               32   
4                  (64, 32)             relu         adam               32   
..                      ...              ...          ...              ...   
67               (256, 128)             tanh          sgd               64   
68               (256, 128)             tanh         adam               64   
69               (256, 128)             tanh          sgd               64   
70               (256, 128)             tanh         adam               64   
71               (256, 128)             tanh          sgd               64   

   param_max_iter  mea

In [10]:
def create_model(x_shape, y_shape):
  model = Sequential()

  # Add Dense layers
  model.add(Dense(64, activation='tanh', input_shape=(x_shape,)))

  model.add(Dense(32, activation='tanh'))

  # Output layer with softmax activation for classification
  model.add(Dense(y_shape, activation='softmax'))

  # Compile the model
  model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

  return model

In [11]:
X_train_full = np.concatenate((x_train, x_val), axis=0)
y_train_full = np.concatenate((y_train, y_val), axis=0)

final_model = create_model(x_train.shape[1], len(np.unique(y_train)))

# Train the model on the full training dataset
early_stopping = EarlyStopping(monitor='accuracy', patience=20, restore_best_weights=True)
final_model.fit(X_train_full, y_train_full, epochs=150, batch_size=64, callbacks=[early_stopping])

test_loss, test_accuracy = final_model.evaluate(x_test, y_test)
print(f"Test Accuracy: {test_accuracy}")

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78

In [12]:
# Evaluate model performance on the test set
y_pred = np.argmax(final_model.predict(x_test), axis=1)
print(y_pred)
test_accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print("Test Accuracy:", test_accuracy)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", class_report)


[0 2 0 ... 1 1 1]
Test Accuracy: 0.7826666666666666
Confusion Matrix:
 [[348 139  40]
 [ 35 437  20]
 [ 51  41 389]]
Classification Report:
               precision    recall  f1-score   support

           0       0.80      0.66      0.72       527
           1       0.71      0.89      0.79       492
           2       0.87      0.81      0.84       481

    accuracy                           0.78      1500
   macro avg       0.79      0.79      0.78      1500
weighted avg       0.79      0.78      0.78      1500



In [13]:
k = 5  # Number of folds
kf = KFold(n_splits=k, shuffle=True, random_state=42)

# Initialize lists to store evaluation results across folds
test_accuracies = []
confusion_matrices = []
classification_reports = []

fold_index = 0
for train_idx, test_idx in kf.split(X):
    fold_index += 1
    print(f"Training and evaluating fold {fold_index}/{k}")

    x_train, x_test = X.iloc[train_idx], X.iloc[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]

    # Standardize features
    scaler = StandardScaler()
    x_train = scaler.fit_transform(x_train)
    x_test = scaler.transform(x_test)

    # Evaluate the model on the test set
    test_loss, test_accuracy = final_model.evaluate(x_test, y_test)
    print(f"Test Accuracy (Fold {fold_index}): {test_accuracy}")

    y_pred = np.argmax(final_model.predict(x_test), axis=1)

    # Calculate and store confusion matrix and classification report
    conf_matrix = confusion_matrix(y_test, y_pred)
    classification_report_str = classification_report(y_test, y_pred)

    # Store evaluation results
    test_accuracies.append(test_accuracy)
    confusion_matrices.append(conf_matrix)
    classification_reports.append(classification_report_str)

# Calculate and print average test accuracy across all folds
average_test_accuracy = np.mean(test_accuracies)
print(f"Average Test Accuracy: {average_test_accuracy}")

# Print average confusion matrix across all folds
average_conf_matrix = np.mean(confusion_matrices, axis=0)
print("Average Confusion Matrix:")
print(average_conf_matrix)

# Combine classification reports from all folds
print("Average Classification Report:")
combined_classification_report = "\n\n".join(classification_reports)
print(combined_classification_report)

Training and evaluating fold 1/5
Test Accuracy (Fold 1): 0.7833333611488342
Training and evaluating fold 2/5
Test Accuracy (Fold 2): 0.812666654586792
Training and evaluating fold 3/5
Test Accuracy (Fold 3): 0.8180000185966492
Training and evaluating fold 4/5
Test Accuracy (Fold 4): 0.8073333501815796
Training and evaluating fold 5/5
Test Accuracy (Fold 5): 0.8100000023841858
Average Test Accuracy: 0.8062666773796081
Average Confusion Matrix:
[[352.6 123.4  24. ]
 [ 37.4 440.6  22. ]
 [ 42.6  41.2 416.2]]
Average Classification Report:
              precision    recall  f1-score   support

           0       0.80      0.66      0.72       527
           1       0.71      0.89      0.79       492
           2       0.87      0.81      0.84       481

    accuracy                           0.78      1500
   macro avg       0.79      0.79      0.78      1500
weighted avg       0.79      0.78      0.78      1500


              precision    recall  f1-score   support

           0       0.

In [14]:
final_model.save("PPDMD5.h5")

  saving_api.save_model(


In [15]:
import joblib
joblib.dump(scaler, "scaler.joblib")

['scaler.joblib']