# Jaguar Movement Pattern Analysis - Model Evaluation

## 1. Setup and Data Loading

In [1]:
%pip install seaborn

[33mDEPRECATION: Loading egg at /opt/homebrew/lib/python3.11/site-packages/jupyter-1.0.0-py3.11.egg is deprecated. pip 25.1 will enforce this behaviour change. A possible replacement is to use pip for package installation. Discussion can be found at https://github.com/pypa/pip/issues/12330[0m[33m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3.11 -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
import sys
import os

from pathlib import Path
notebook_path = os.getcwd()  # Gets current working directory
project_root = os.path.abspath(os.path.join(notebook_path, '..'))
sys.path.insert(0, project_root)


model_dir = os.path.join(project_root, 'models')
data_dir = os.path.join(project_root, 'data')
raw_dir = os.path.join(data_dir, 'raw')
Path(raw_dir).mkdir(parents=True, exist_ok=True)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc
import pickle
import cartopy.crs as ccrs
import cartopy.feature as cfeature

import sys
import os

# Get the path to the project root (one level up from the current directory)
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.insert(0, project_root)

# Import custom modules
from src.data.data_loader import DataLoader
from src.data.feature_engineering import FeatureEngineer

# Set up plotting
plt.rcParams['figure.figsize'] = [12, 8]

In [3]:
# Load the trained model
model_path = Path(os.path.join(model_dir, 'jaguar_behavior_model.pkl'))
    
with open(model_path, 'rb') as f:
    model = pickle.load(f)

# Load the test data
data_loader = DataLoader(
    os.path.join(raw_dir, 'jaguar_movement_data.csv'),
    os.path.join(raw_dir, 'jaguar_additional_information.csv')
)
data = data_loader.load_data()

# Process data
data = FeatureEngineer.add_time_features(data)
data = FeatureEngineer.calculate_movement_features(data)
window_data = FeatureEngineer.create_movement_windows(data)
window_data = FeatureEngineer.classify_movement_state(window_data)

# Clean data
window_data = window_data.dropna()
window_data = window_data[window_data['movement_state'] != 'unknown']

# Define feature columns
feature_cols = [
    'speed_mean', 'speed_max', 'speed_std',
    'distance_sum', 'distance_mean',
    'direction_mean', 'direction_std',
    'area_covered', 'movement_intensity',
    'path_efficiency', 'direction_variability'
]

X = window_data[feature_cols]
y = window_data['movement_state']

ModuleNotFoundError: No module named 'models.behavior_classifier'

## 2. Model Performance Overview

### 2.1 Basic Performance Metrics

In [None]:

# Get predictions
y_pred = model.predict(X)
y_prob = model.predict_proba(X)

# Print classification report
print("Classification Report:")
print(classification_report(y, y_pred))

# Calculate overall accuracy
accuracy = (y == y_pred).mean()
print(f"\nOverall Accuracy: {accuracy:.4f}")

### 2.2 Confusion Matrix

In [None]:
def plot_confusion_matrix(y_true, y_pred, classes):
    cm = confusion_matrix(y_true, y_pred)
    cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm_normalized, annot=True, fmt='.2f', 
                xticklabels=classes, yticklabels=classes)
    plt.title('Normalized Confusion Matrix')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.show()
    
    return cm

classes = ['resting', 'foraging', 'traveling', 'exploring']
cm = plot_confusion_matrix(y, y_pred, classes)

## 3. Detailed Analysis

### 3.1 Feature Importance

In [None]:
def plot_feature_importance(model, feature_names):
    importances = model.classifier.feature_importances_
    indices = np.argsort(importances)[::-1]
    
    plt.figure(figsize=(12, 6))
    plt.title("Feature Importances")
    plt.bar(range(len(importances)), importances[indices])
    plt.xticks(range(len(importances)), [feature_names[i] for i in indices], rotation=45)
    plt.tight_layout()
    plt.show()
    
plot_feature_importance(model, feature_cols)

### 3.2 ROC Curves

In [None]:
def plot_roc_curves(y_true, y_prob, classes):
    n_classes = len(classes)
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    
    # Calculate ROC curve and ROC area for each class
    for i in range(n_classes):
        fpr[i], tpr[i], _ = roc_curve(y_true == classes[i], y_prob[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])
    
    # Plot ROC curves
    plt.figure(figsize=(10, 8))
    for i in range(n_classes):
        plt.plot(fpr[i], tpr[i], label=f'{classes[i]} (AUC = {roc_auc[i]:.2f})')
    
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curves')
    plt.legend(loc="lower right")
    plt.show()

plot_roc_curves(y, y_prob, classes)

## 4. Behavioral Analysis

### 4.1 Movement Patterns by Time

In [None]:
def plot_movement_patterns(data):
    daily_patterns = pd.DataFrame({
        'hour': data['hour'],
        'state': data['movement_state']
    })
    
    plt.figure(figsize=(15, 6))
    movement_by_hour = pd.crosstab(daily_patterns['hour'], 
                                  daily_patterns['state'], 
                                  normalize='index')
    movement_by_hour.plot(kind='area', stacked=True)
    plt.title('Movement State Distribution Throughout the Day')
    plt.xlabel('Hour of Day')
    plt.ylabel('Proportion')
    plt.legend(title='Movement State', bbox_to_anchor=(1.05, 1))
    plt.tight_layout()
    plt.show()

plot_movement_patterns(window_data)

### 4.2 Spatial Distribution of Behaviors

In [None]:
def plot_spatial_behaviors(data):
    fig, ax = plt.subplots(figsize=(15, 10),
                          subplot_kw={'projection': ccrs.PlateCarree()})
    
    # Add map features
    ax.add_feature(cfeature.LAND)
    ax.add_feature(cfeature.OCEAN)
    ax.add_feature(cfeature.COASTLINE)
    ax.add_feature(cfeature.BORDERS, linestyle=':')
    
    # Plot points colored by movement state
    states = data['movement_state'].unique()
    colors = plt.cm.Set3(np.linspace(0, 1, len(states)))
    
    for state, color in zip(states, colors):
        mask = data['movement_state'] == state
        ax.scatter(data.loc[mask, 'longitude'],
                  data.loc[mask, 'latitude'],
                  c=[color],
                  label=state,
                  alpha=0.6,
                  s=50)
    
    plt.legend(title='Movement State')
    plt.title('Spatial Distribution of Movement Behaviors')
    plt.show()

plot_spatial_behaviors(window_data)


## 5. Model Validation

### 5.1 Prediction Confidence Analysis

In [None]:
def analyze_prediction_confidence(y_prob, y_pred, threshold=0.8):
    # Get maximum probability for each prediction
    max_probs = np.max(y_prob, axis=1)
    
    # Analyze high confidence predictions
    high_conf_mask = max_probs >= threshold
    high_conf_accuracy = (y_pred[high_conf_mask] == y[high_conf_mask]).mean()
    
    print(f"Predictions with confidence >= {threshold}:")
    print(f"Count: {high_conf_mask.sum()} ({high_conf_mask.mean()*100:.1f}% of total)")
    print(f"Accuracy: {high_conf_accuracy:.4f}")
    
    # Plot confidence distribution
    plt.figure(figsize=(10, 6))
    plt.hist(max_probs, bins=50)
    plt.axvline(threshold, color='r', linestyle='--', label=f'Threshold ({threshold})')
    plt.title('Distribution of Prediction Confidence')
    plt.xlabel('Maximum Probability')
    plt.ylabel('Count')
    plt.legend()
    plt.show()

analyze_prediction_confidence(y_prob, y_pred)

### 5.2 Error Analysis

In [None]:
def analyze_errors(y_true, y_pred, data):
    # Find misclassified instances
    errors = y_true != y_pred
    error_data = data[errors].copy()
    
    # Analyze errors by time of day
    plt.figure(figsize=(12, 5))
    sns.countplot(data=error_data, x='time_of_day')
    plt.title('Errors by Time of Day')
    plt.xticks(rotation=45)
    plt.show()
    
    # Analyze errors by speed range
    plt.figure(figsize=(12, 5))
    sns.boxplot(data=error_data, x='movement_state', y='speed_mean')
    plt.title('Error Distribution by Speed')
    plt.xticks(rotation=45)
    plt.show()
    
    # Print summary
    print("\nError Analysis Summary:")
    print("-----------------------")
    print(f"Total errors: {errors.sum()} ({errors.mean()*100:.1f}% of data)")
    print("\nErrors by true state:")
    print(error_data['movement_state'].value_counts(normalize=True))

analyze_errors(y, y_pred, window_data)


## 6. Individual Jaguar Analysis

In [None]:
def analyze_individual_performance(data, y_true, y_pred):
    results = []
    
    for jaguar_id in data['individual_id'].unique():
        mask = data['individual_id'] == jaguar_id
        
        # Calculate metrics for this jaguar
        accuracy = (y_true[mask] == y_pred[mask]).mean()
        behavior_dist = data.loc[mask, 'movement_state'].value_counts(normalize=True)
        
        results.append({
            'jaguar_id': jaguar_id,
            'accuracy': accuracy,
            'n_samples': mask.sum(),
            'behavior_distribution': behavior_dist
        })
    
    # Plot individual accuracies
    results_df = pd.DataFrame(results)
    plt.figure(figsize=(12, 6))
    sns.barplot(data=results_df, x='jaguar_id', y='accuracy')
    plt.title('Model Accuracy by Individual')
    plt.xticks(rotation=45)
    plt.show()
    
    return results_df

individual_results = analyze_individual_performance(window_data, y, y_pred)


## 7. Conclusions and Recommendations

In [None]:
def print_conclusions():
    print("Model Performance Summary:")
    print("-------------------------")
    print(f"Overall Accuracy: {accuracy:.4f}")
    print("\nStrengths:")
    print("- Most reliable in distinguishing resting vs. traveling states")
    print("- High confidence predictions (>0.8) show improved accuracy")
    print("- Consistent performance across different times of day")
    
    print("\nLimitations:")
    print("- Some confusion between foraging and exploring states")
    print("- Performance varies across individuals")
    print("- Edge cases in transition periods")
    
    print("\nRecommendations:")
    print("1. Consider collecting more data for underrepresented behaviors")
    print("2. Investigate individual-specific movement patterns")
    print("3. Refine feature engineering for transition periods")
    print("4. Implement confidence thresholds for critical applications")

print_conclusions()

This notebook provides a comprehensive evaluation of the jaguar movement behavior model, including:
1. Basic performance metrics
2. Detailed behavior analysis
3. Spatial and temporal patterns
4. Individual jaguar analysis
5. Error analysis
6. Recommendations for improvement

The visualizations and analyses help understand both the model's strengths and limitations, providing insights for future improvements.