In [1]:
from sklearnex import patch_sklearn
patch_sklearn()
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, cohen_kappa_score
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

# Remove warnings
pd.options.mode.chained_assignment = None  # default='warn'
import warnings
warnings.filterwarnings("ignore")

# Load the training and validation datasets
train_data = pd.read_csv('train_data.csv')
val_data = pd.read_csv('val_data.csv')

# Define features and target
features = ['amplitude_red', 'phase_red', 'offset_red', 'amplitude_crswir', 'phase_crswir', 'offset_crswir', 'amplitude_rcc', 'phase_rcc', 'offset_rcc', 'elevation', 'aspect']
X_train = train_data[features]
y_train = train_data['phen']
X_val = val_data[features]
y_val = val_data['phen']

print("Data loaded and preprocessed.")

# Define kernels to test
kernels = ['linear', 'poly', 'rbf']
results = []

for kernel in kernels:
    print(f"Training SVM with {kernel} kernel.")
    svc = SVC(kernel=kernel, random_state=42)
    svc.fit(X_train, y_train)
    
    # Predict on validation set
    val_data['predicted_phen'] = svc.predict(X_val)
    
    # Compute overall metrics
    accuracy = accuracy_score(y_val, val_data['predicted_phen'])
    kappa = cohen_kappa_score(y_val, val_data['predicted_phen'])
    tn, fp, fn, tp = confusion_matrix(y_val, val_data['predicted_phen'], labels=[1, 2]).ravel()
    
    # Compute confusion matrix as percentages
    total = tn + fp + fn + tp
    tn_pct = tn / total * 100
    fp_pct = fp / total * 100
    fn_pct = fn / total * 100
    tp_pct = tp / total * 100
    
    # Store overall metrics
    results.append({
        'Kernel': kernel,
        'Overall Accuracy': accuracy,
        'Kappa': kappa,
        'True Positives': tp,
        'True Negatives': tn,
        'False Positives': fp,
        'False Negatives': fn,
        'TP %': tp_pct,
        'TN %': tn_pct,
        'FP %': fp_pct,
        'FN %': fn_pct
    })
    
    # Compute breakdown per GRECO region
    metrics_per_greco = []

    for greco_region in val_data['greco_region'].unique():
        region_data = val_data[val_data['greco_region'] == greco_region]
        y_val_region = region_data['phen']
        y_pred_region = region_data['predicted_phen']
        
        accuracy_region = accuracy_score(y_val_region, y_pred_region)
        kappa_region = cohen_kappa_score(y_val_region, y_pred_region)
        tn, fp, fn, tp = confusion_matrix(y_val_region, y_pred_region, labels=[1, 2]).ravel()
        
        metrics_per_greco.append({
            'Kernel': kernel,
            'GRECO Region': greco_region,
            'Accuracy': accuracy_region,
            'Kappa': kappa_region,
            'True Positives': tp,
            'True Negatives': tn,
            'False Positives': fp,
            'False Negatives': fn
        })

    metrics_df = pd.DataFrame(metrics_per_greco)
    metrics_df.to_csv(f'results/exp3_validation_metrics_per_greco_region_{kernel}.csv', index=False)
    print(f"Validation metrics breakdown per GRECO region for {kernel} kernel saved.")

# Save overall results
results_df = pd.DataFrame(results)
results_df.to_csv('results/exp3_overall_results.csv', index=False)
print("Overall results saved.")

# # Plot feature importance for the linear kernel (coefficients from the linear SVM)
# if 'linear' in kernels:
#     linear_svc = SVC(kernel='linear', random_state=42)
#     linear_svc.fit(X_train, y_train)
#     importance_df = pd.DataFrame({'Feature': features, 'Importance': linear_svc.coef_[0]})
#     importance_df = importance_df.sort_values(by='Importance', ascending=False)

#     plt.figure(figsize=(12, 6))
#     plt.barh(importance_df['Feature'], importance_df['Importance'], color='skyblue')
#     plt.xlabel('Feature Coefficient')
#     plt.title('Feature Importance from Linear SVM')
#     plt.gca().invert_yaxis()
#     plt.grid(True, linestyle='--', linewidth=0.5)

#     # Save the feature importance plot
#     os.makedirs('images', exist_ok=True)
#     plt.savefig('images/exp3_feature_importance_linear.png', dpi=300)
#     plt.show()


Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)
