## Testing actual data on saved models!

In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.impute import SimpleImputer
from tensorflow.keras.models import load_model
from sklearn.metrics import accuracy_score, classification_report

# Define input-output file names
datasets = {
    "B": {"input": "./Training_data/input_cathepsin_B.csv", "output": "./Training_data/output_cathepsin_B.csv"},
    "S": {"input": "./Training_data/input_cathepsin_S.csv", "output": "./Training_data/output_cathepsin_S.csv"},
    "D": {"input": "./Training_data/input_cathepsin_D.csv", "output": "./Training_data/output_cathepsin_D.csv"},
    "K": {"input": "./Training_data/input_cathepsin_K.csv", "output": "./Training_data/output_cathepsin_K.csv"},
}

# Function to preprocess data (without augmentation)
def preprocess_data(input_path, output_path):
    # Load input and output data
    input_data = pd.read_csv(input_path, header=None, skiprows=1)
    output_data = pd.read_csv(output_path, header=None, skiprows=1)

    # Normalize input data
    scaler = MinMaxScaler()
    input_data_normalized = scaler.fit_transform(input_data)

    # Handle NaN values
    imputer = SimpleImputer(strategy='mean')
    input_data_imputed = imputer.fit_transform(input_data_normalized)

    # One-hot encode output data
    output_labels = output_data[0].values
    output_data_encoded = pd.get_dummies(output_labels, dtype=np.float32).values

    # Reshape input data for CNN
    input_data_reshaped = input_data_imputed.reshape((input_data_imputed.shape[0], input_data_imputed.shape[1], 1))

    return input_data_reshaped, output_data_encoded

# Process each dataset without augmentation
for key, paths in datasets.items():
    print(f"Processing dataset {key} for prediction...")

    # Preprocess the original data
    input_data_original, output_data_original = preprocess_data(paths['input'], paths['output'])

    # Load the trained model
    model = load_model(f'./Saved_Models/model_{key}.h5')

    # Make predictions
    predictions = model.predict(input_data_original)
    predicted_classes = np.argmax(predictions, axis=1)  # Convert probabilities to class labels
    true_classes = np.argmax(output_data_original, axis=1)  # Actual labels

    # Compute accuracy
    accuracy = accuracy_score(true_classes, predicted_classes)
    print(f"Accuracy on original data for dataset {key}: {accuracy:.4f}")

    # Display classification report
    print(f"Classification Report for dataset {key}:")
    print(classification_report(true_classes, predicted_classes))




Processing dataset B for prediction...
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Accuracy on original data for dataset B: 0.9771
Classification Report for dataset B:
              precision    recall  f1-score   support

           0       0.99      0.98      0.99      1733
           1       0.94      0.95      0.95       319
           2       0.93      0.98      0.95       255
           3       0.67      1.00      0.80         6

    accuracy                           0.98      2313
   macro avg       0.88      0.98      0.92      2313
weighted avg       0.98      0.98      0.98      2313

Processing dataset S for prediction...




[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Accuracy on original data for dataset S: 0.9186
Classification Report for dataset S:
              precision    recall  f1-score   support

           0       0.93      0.93      0.93       980
           1       0.86      0.91      0.88       881
           2       0.96      0.90      0.93      1556
           3       0.87      0.99      0.93       316

    accuracy                           0.92      3733
   macro avg       0.91      0.93      0.92      3733
weighted avg       0.92      0.92      0.92      3733

Processing dataset D for prediction...




[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Accuracy on original data for dataset D: 0.9705
Classification Report for dataset D:
              precision    recall  f1-score   support

           0       0.99      0.97      0.98      3021
           1       0.90      0.97      0.93       658
           2       0.94      0.96      0.95       408
           3       0.71      1.00      0.83        20

    accuracy                           0.97      4107
   macro avg       0.89      0.98      0.92      4107
weighted avg       0.97      0.97      0.97      4107

Processing dataset K for prediction...




[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Accuracy on original data for dataset K: 0.9409
Classification Report for dataset K:
              precision    recall  f1-score   support

           0       0.98      0.97      0.98       998
           1       0.85      0.96      0.90       556
           2       0.98      0.90      0.94      1251
           3       0.84      1.00      0.91       258

    accuracy                           0.94      3063
   macro avg       0.91      0.96      0.93      3063
weighted avg       0.95      0.94      0.94      3063

