In [1]:
import joblib
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd

# Load test data for evaluation
engineered_file = "engineered_data.csv"
data = pd.read_csv(engineered_file)

X_test = data.drop(columns=["EntryID", "term", "aspect", "taxonomyID_x_GO_term"], errors="ignore")
y_test = data["taxonomyID"].astype(int)

# Load saved models
model_files = [
    "NaiveBayes.pth",
    "DecisionTree.pth",
    "RandomForest.pth",
    "SGDClassifier_(Approximate_SVM).pth",
    "SVM_Subset.pth"
]

print("\nEvaluating saved models...")
results = []

for model_file in model_files:
    model_name = model_file.replace(".pth", "").replace("_", " ")
    try:
        print(f"\nLoading model: {model_name}")
        model = joblib.load(model_file)

        # Apply PCA for SVM model
        if "SVM_Subset" in model_file:
            pca = PCA(n_components=12, random_state=42)
            X_test_pca = pca.fit_transform(X_test)
            y_pred = model.predict(X_test_pca)
        else:
            y_pred = model.predict(X_test)

        accuracy = accuracy_score(y_test, y_pred)
        report = classification_report(y_test, y_pred)
        print(f"{model_name} Accuracy: {accuracy:.4f}")
        print(f"{model_name} Classification Report:\n{report}")
        results.append((model_name, accuracy))

    except Exception as e:
        print(f"Error evaluating {model_name}: {e}")

# Display results summary
results_df = pd.DataFrame(results, columns=["Model", "Accuracy"])
print("\nSummary of Model Performance:")
print(results_df)



Evaluating saved models...

Loading model: NaiveBayes


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


NaiveBayes Accuracy: 0.0610
NaiveBayes Classification Report:
              precision    recall  f1-score   support

           0       0.86      0.02      0.03   4771062
           1       0.05      0.98      0.09    250129
           2       0.00      0.00      0.00     13504
           3       0.81      0.00      0.00    314440
           4       0.00      0.00      0.00      2227
           5       0.44      0.00      0.01      1397
           6       0.00      0.00      0.00      3760
           7       0.00      0.00      0.00      2166
           8       0.00      0.00      0.00      1275
           9       0.00      0.00      0.00       413
          10       0.00      0.00      0.00       432
          11       0.00      0.00      0.00       525
          12       0.00      0.00      0.00       260
          13       0.00      0.00      0.00        62
          15       0.00      0.00      0.00       393
          16       0.38      0.00      0.00      1226
          17       

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


RandomForest Accuracy: 0.9993
RandomForest Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00   4771062
           1       1.00      1.00      1.00    250129
           2       1.00      0.99      0.99     13504
           3       1.00      1.00      1.00    314440
           4       1.00      0.83      0.91      2227
           5       0.94      1.00      0.97      1397
           6       1.00      0.97      0.98      3760
           7       0.56      1.00      0.72      2166
           8       0.00      0.00      0.00      1275
           9       0.00      0.00      0.00       413
          10       0.00      0.00      0.00       432
          11       0.46      0.94      0.62       525
          12       0.00      0.00      0.00       260
          13       0.00      0.00      0.00        62
          15       0.00      0.00      0.00       393
          16       0.63      1.00      0.77      1226
          17   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


SGDClassifier (Approximate SVM) Accuracy: 0.0368
SGDClassifier (Approximate SVM) Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.04      0.07   4771062
           1       0.53      0.00      0.00    250129
           2       0.01      0.00      0.00     13504
           3       0.20      0.03      0.06    314440
           4       0.00      0.00      0.00      2227
           5       0.00      0.99      0.00      1397
           6       0.00      0.00      0.00      3760
           7       0.00      0.00      0.00      2166
           8       0.00      0.00      0.00      1275
           9       0.00      0.00      0.00       413
          10       0.00      0.00      0.00       432
          11       0.00      0.00      0.00       525
          12       0.00      0.00      0.00       260
          13       0.00      0.00      0.00        62
          15       0.00      0.00      0.00       393
          16       0.00      0.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
