# Model Evaluation

## Objectives:
- Perform cross-validation for all trained models.
- Evaluate models on unseen data using accuracy, precision, recall, and F1-score.
- Visualize model performance for comparison.

1. Import Libraries

In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report
from sklearn.model_selection import cross_val_score, cross_validate
import matplotlib.pyplot as plt
import seaborn as sns
import joblib

2. Load Data and Trained Models

In [2]:
print("Loading data...")

import os
import pandas as pd

# Define the file path
file_path = '/workspace/bicycle_thefts_berlin/outputs/datasets/featured/TestSet_Featured.csv'

# Verify and load the file
if os.path.exists(file_path):
    print("File path verified. Loading test data...")
    test_data = pd.read_csv(file_path)
    print("Test data loaded successfully.")
else:
    print(f"Error: File not found at {file_path}")

# Verify the data structure
print("Test data head:")
print(test_data.head())

# Specify the target column
TARGET_COLUMN = 'VERSUCH'  # Ensure this matches the target column in your dataset

# Separate features and target
X_test = test_data.drop(columns=[TARGET_COLUMN])
y_test = test_data[TARGET_COLUMN]

# Confirm data shape
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)

print("Loading trained models...")

logistic_model_path = '/workspace/bicycle_thefts_berlin/jupyter_notebooks/Logistic Regression_model.pkl'
random_forest_model_path = '/workspace/bicycle_thefts_berlin/jupyter_notebooks/Random Forest_model.pkl'
svm_model_path = '/workspace/bicycle_thefts_berlin/jupyter_notebooks/SVM_model.pkl'
knn_model_path = '/workspace/bicycle_thefts_berlin/jupyter_notebooks/K-Nearest Neighbors_model.pkl'

# Load the models
if os.path.exists(logistic_model_path):
    logistic_model = joblib.load(logistic_model_path)
    print("Logistic Regression model loaded.")
else:
    print(f"Error: Logistic Regression model not found at {logistic_model_path}")

if os.path.exists(random_forest_model_path):
    random_forest_model = joblib.load(random_forest_model_path)
    print("Random Forest model loaded.")
else:
    print(f"Error: Random Forest model not found at {random_forest_model_path}")

if os.path.exists(svm_model_path):
    svm_model = joblib.load(svm_model_path)
    print("SVM model loaded.")
else:
    print(f"Error: SVM model not found at {svm_model_path}")

if os.path.exists(knn_model_path):
    knn_model = joblib.load(knn_model_path)
    print("K-Nearest Neighbors model loaded.")
else:
    print(f"Error: K-Nearest Neighbors model not found at {knn_model_path}")


Loading data...
File path verified. Loading test data...
Test data loaded successfully.
Test data head:
  ANGELEGT_AM  TATZEIT_ANFANG_STUNDE TATZEIT_ENDE_DATUM  TATZEIT_ENDE_STUNDE  \
0  2022-03-10               0.252928         2022-03-10                   17   
1  2022-03-07              -1.238789         2022-03-06                   14   
2  2022-05-12               0.998786         2022-05-11                    7   
3  2023-07-15               1.185251         2023-07-15                    3   
4  2022-05-16               0.439393         2022-05-16                   17   

  VERSUCH                                    ERFASSUNGSGRUND  \
0       0       Sonstiger schwerer Diebstahl von FahrrÃ¤dern   
1       0       Sonstiger schwerer Diebstahl von FahrrÃ¤dern   
2       0       Sonstiger schwerer Diebstahl von FahrrÃ¤dern   
3       0       Sonstiger schwerer Diebstahl von FahrrÃ¤dern   
4       0  Sonstiger schwerer Diebstahl in/aus Keller/Bod...   

   ART_DES_FAHRRADS_Fahrrad  A

3. Evaluate Models on Test Data

In [5]:
import os
import pandas as pd
from sklearn.metrics import accuracy_score, f1_score
from sklearn.exceptions import NotFittedError

print("Evaluating Models on Test Data...")

# Ensure that y_test has consistent data types (convert to numeric)
if y_test.dtype == 'object':
    y_test = pd.to_numeric(y_test, errors='coerce')
    print("Converted y_test to numeric.")

# Handle NaN values in y_test
if y_test.isnull().sum() > 0:
    print(f"Found {y_test.isnull().sum()} NaN values in y_test. Dropping NaN values.")
    valid_indices = y_test.dropna().index
    X_test = X_test.loc[valid_indices]
    y_test = y_test.loc[valid_indices]

# Align features in X_test with the features used during model training
if hasattr(logistic_model, 'feature_names_in_'):
    train_features = logistic_model.feature_names_in_
    X_test_aligned = X_test.reindex(columns=train_features, fill_value=0)
else:
    X_test_aligned = X_test.copy()

# Define models dictionary
models = {
    'Logistic Regression': logistic_model,
    'Random Forest': random_forest_model,
    'Support Vector Machine': svm_model,
    'K-Nearest Neighbors': knn_model
}

# Dictionary to store evaluation results
evaluation_results = {}

# Evaluate each model
for model_name, model in models.items():
    try:
        print(f"Evaluating {model_name}...")

        # Predict on the aligned test set
        y_pred = model.predict(X_test_aligned)

        # Ensure y_pred is numeric if y_test is numeric
        if y_test.dtype in ['int64', 'float64']:
            y_pred = pd.to_numeric(y_pred, errors='coerce')

        # Calculate metrics
        accuracy = accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred, average='weighted')

        # Store results
        evaluation_results[model_name] = {
            'accuracy': accuracy,
            'f1_score': f1
        }
        print(f"{model_name} -> Accuracy: {accuracy:.4f}, F1 Score: {f1:.4f}")

    except ValueError as ve:
        print(f"ValueError evaluating {model_name}: {ve}")
    except NotFittedError as nfe:
        print(f"NotFittedError evaluating {model_name}: {nfe}")
    except Exception as e:
        print(f"Error evaluating {model_name}: {e}")

# Print evaluation results
print("\nEvaluation Results:")
for model_name, metrics in evaluation_results.items():
    print(f"{model_name}: {metrics}")

# Save evaluation results to a CSV file
results_df = pd.DataFrame(evaluation_results).T
results_dir = 'outputs'

# Create directory if it doesn't exist
if not os.path.exists(results_dir):
    os.makedirs(results_dir)

results_path = os.path.join(results_dir, 'evaluation_results.csv')
results_df.to_csv(results_path, index=True)
print(f"Evaluation results saved to '{results_path}'.")


Evaluating Models on Test Data...
Found 1 NaN values in y_test. Dropping NaN values.
Evaluating Logistic Regression...
Logistic Regression -> Accuracy: 0.9976, F1 Score: 0.9964
Evaluating Random Forest...
Random Forest -> Accuracy: 0.9971, F1 Score: 0.9961
Evaluating Support Vector Machine...
Support Vector Machine -> Accuracy: 0.9976, F1 Score: 0.9964
Evaluating K-Nearest Neighbors...
K-Nearest Neighbors -> Accuracy: 0.9976, F1 Score: 0.9964

Evaluation Results:
Logistic Regression: {'accuracy': 0.9975657818476875, 'f1_score': np.float64(0.9963501559312016)}
Random Forest: {'accuracy': 0.9971021212472471, 'f1_score': np.float64(0.9961182721520468)}
Support Vector Machine: {'accuracy': 0.9975657818476875, 'f1_score': np.float64(0.9963501559312016)}
K-Nearest Neighbors: {'accuracy': 0.9975657818476875, 'f1_score': np.float64(0.9963501559312016)}
Evaluation results saved to 'outputs/evaluation_results.csv'.


4. Evaluation Results Summary

In [6]:
# Summarize Evaluation Results
print("\nFinal Evaluation Results:")
results_df = pd.DataFrame(evaluation_results).T
print(results_df)

# Save Evaluation Results to CSV
results_dir = 'outputs/evaluation'
if not os.path.exists(results_dir):
    os.makedirs(results_dir)

results_file = os.path.join(results_dir, 'final_evaluation_results.csv')
results_df.to_csv(results_file, index=True)
print(f"Evaluation results saved to {results_file}.")


Final Evaluation Results:
                        accuracy  f1_score
Logistic Regression     0.997566  0.996350
Random Forest           0.997102  0.996118
Support Vector Machine  0.997566  0.996350
K-Nearest Neighbors     0.997566  0.996350
Evaluation results saved to outputs/evaluation/final_evaluation_results.csv.


5. Model Comparison and Selection
Compare the evaluation results (accuracy and F1-score) to determine the best-performing model.

In [7]:
# Identify the best model based on accuracy and F1-score
best_model_name = results_df['accuracy'].idxmax()
best_model_performance = results_df.loc[best_model_name]

print(f"\nBest Model: {best_model_name}")
print(f"Performance:\n{best_model_performance}")


Best Model: Logistic Regression
Performance:
accuracy    0.997566
f1_score    0.996350
Name: Logistic Regression, dtype: float64


6. Save the Best Model
Save the best-performing model for deployment or further analysis

In [8]:
# Save the best model
best_model = models[best_model_name]
best_model_file = os.path.join(results_dir, f'{best_model_name}_best_model.pkl')

joblib.dump(best_model, best_model_file)
print(f"Best model saved to {best_model_file}.")

Best model saved to outputs/evaluation/Logistic Regression_best_model.pkl.


7. Classification Report
Generate a detailed classification report for the best-performing model.

In [10]:
from sklearn.metrics import classification_report

# Generate predictions using the best model
y_pred_best = best_model.predict(X_test_aligned)

# Create and print the classification report
classification_report_best = classification_report(y_test, y_pred_best)
print(f"\nClassification Report for {best_model_name}:\n")
print(classification_report_best)

# Save the classification report to a text file
classification_report_file = os.path.join(results_dir, f'{best_model_name}_classification_report.txt')
with open(classification_report_file, 'w') as f:
    f.write(f"Classification Report for {best_model_name}:\n")
    f.write(classification_report_best)
print(f"Classification report saved to {classification_report_file}.")


Classification Report for Logistic Regression:

              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00      8606
         1.0       0.00      0.00      0.00        21

    accuracy                           1.00      8627
   macro avg       0.50      0.50      0.50      8627
weighted avg       1.00      1.00      1.00      8627

Classification report saved to outputs/evaluation/Logistic Regression_classification_report.txt.


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
