## üìã Section 1: Setup & Configuration

Adjust these parameters to customize your analysis

In [None]:
# Configuration Parameters
N_SATELLITES = 150
TIME_STEPS = 80
NOISE_LEVEL = 0.08
RANDOM_SEED = 42

LEO_RATIO = 0.6
MEO_RATIO = 0.25
GEO_RATIO = 0.15

HIGH_RISK_THRESHOLD = 5
CAUTION_THRESHOLD = 10

DBSCAN_EPS = 800
DBSCAN_MIN_SAMPLES = 4

KMEANS_CLUSTERS = 5

TREE_MAX_DEPTH = 10
TREE_MIN_SAMPLES_SPLIT = 5

KNN_NEIGHBORS = 8
KNN_WEIGHTS = 'distance'

SVM_C = 5.0
SVM_KERNEL = 'rbf'
SVM_GAMMA = 'scale'

print("Parameters configured")

: 

In [None]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.mplot3d import Axes3D
import warnings
warnings.filterwarnings('ignore')


from sklearn.cluster import DBSCAN, KMeans
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (accuracy_score, classification_report, 
                             confusion_matrix, silhouette_score,
                             roc_curve, auc, precision_recall_fscore_support)


plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

print("libaries imported successfully")

## üìä Section 2: Data Generation

In [None]:
np.random.seed(RANDOM_SEED)

def generate_satellite_data(n_satellites, timesteps, leo_ratio, meo_ratio, geo_ratio, noise):
    orbits = {
        'LEO': {'altitude': (400, 2000), 'velocity': (7.5, 8.0)},
        'MEO': {'altitude': (2000, 35786), 'velocity': (3.5, 7.5)},
        'GEO': {'altitude': (35786, 42164), 'velocity': (3.0, 3.2)}
    }
    
    data = []
    n_leo = int(n_satellites * leo_ratio)
    n_meo = int(n_satellites * meo_ratio)
    n_geo = n_satellites - n_leo - n_meo
    
    satellite_id = 0
    
    for orbit_type, count in [('LEO', n_leo), ('MEO', n_meo), ('GEO', n_geo)]:
        params = orbits[orbit_type]
        
        for _ in range(count):
            altitude = np.random.uniform(*params['altitude'])
            velocity = np.random.uniform(*params['velocity'])
            inclination = np.random.uniform(0, 180)
            
            theta = np.random.uniform(0, 2*np.pi)
            phi = np.random.uniform(0, np.pi)
            
            for t in range(timesteps):
                angle = theta + (velocity / altitude) * t * 0.1
                
                x = altitude * np.sin(phi) * np.cos(angle) + np.random.normal(0, noise * altitude)
                y = altitude * np.sin(phi) * np.sin(angle) + np.random.normal(0, noise * altitude)
                z = altitude * np.cos(phi) + np.random.normal(0, noise * altitude)
                
                vx = -velocity * np.sin(angle) + np.random.normal(0, noise)
                vy = velocity * np.cos(angle) + np.random.normal(0, noise)
                vz = np.random.normal(0, noise * 0.5)
                
                data.append({
                    'satellite_id': f'SAT-{satellite_id:03d}',
                    'orbit_type': orbit_type,
                    'timestep': t,
                    'x': x, 'y': y, 'z': z,
                    'vx': vx, 'vy': vy, 'vz': vz,
                    'altitude': altitude,
                    'velocity': velocity,
                    'inclination': inclination
                })
            
            satellite_id += 1
    
    df = pd.DataFrame(data)
    
    collision_data = []
    for t in range(timesteps):
        df_t = df[df['timestep'] == t]
        
        for i, sat1 in df_t.iterrows():
            for j, sat2 in df_t.iterrows():
                if sat1['satellite_id'] >= sat2['satellite_id']:
                    continue
                
                distance = np.sqrt((sat1['x'] - sat2['x'])**2 + (sat1['y'] - sat2['y'])**2 + (sat1['z'] - sat2['z'])**2)
                rel_vel = np.sqrt((sat1['vx'] - sat2['vx'])**2 + (sat1['vy'] - sat2['vy'])**2 + (sat1['vz'] - sat2['vz'])**2)
                
                if distance < HIGH_RISK_THRESHOLD:
                    risk = 'HIGH_RISK'
                elif distance < CAUTION_THRESHOLD:
                    risk = 'CAUTION'
                else:
                    risk = 'SAFE'
                
                collision_data.append({
                    'timestep': t,
                    'sat1': sat1['satellite_id'],
                    'sat2': sat2['satellite_id'],
                    'distance': distance,
                    'relative_velocity': rel_vel,
                    'sat1_altitude': sat1['altitude'],
                    'sat2_altitude': sat2['altitude'],
                    'altitude_diff': abs(sat1['altitude'] - sat2['altitude']),
                    'sat1_orbit': sat1['orbit_type'],
                    'sat2_orbit': sat2['orbit_type'],
                    'risk_level': risk
                })
    
    return df, pd.DataFrame(collision_data)

satellites_df, collisions_df = generate_satellite_data(N_SATELLITES, TIME_STEPS, LEO_RATIO, MEO_RATIO, GEO_RATIO, NOISE_LEVEL)

print(f"Generated {len(satellites_df)} satellite positions and {len(collisions_df)} collision pairs")
print(f"\nRisk distribution:\n{collisions_df['risk_level'].value_counts()}")
collisions_df.head()

## üîç Section 3: Exploratory Data Analysis

In [None]:
fig = plt.figure(figsize=(18, 12))

# 3D satellite positions
ax1 = fig.add_subplot(2, 3, 1, projection='3d')
for orbit in ['LEO', 'MEO', 'GEO']:
    orbit_data = satellites_df[(satellites_df['orbit_type'] == orbit) & (satellites_df['timestep'] == 0)]
    ax1.scatter(orbit_data['x'], orbit_data['y'], orbit_data['z'], label=orbit, alpha=0.6, s=50)
ax1.set_xlabel('X (km)')
ax1.set_ylabel('Y (km)')
ax1.set_zlabel('Z (km)')
ax1.set_title('Satellite Distribution')
ax1.legend()

# Distance distribution
ax2 = fig.add_subplot(2, 3, 2)
ax2.hist(collisions_df['distance'], bins=50, color='steelblue', edgecolor='black', alpha=0.7)
ax2.axvline(HIGH_RISK_THRESHOLD, color='red', linestyle='--', label=f'High Risk: {HIGH_RISK_THRESHOLD}km')
ax2.axvline(CAUTION_THRESHOLD, color='orange', linestyle='--', label=f'Caution: {CAUTION_THRESHOLD}km')
ax2.set_xlabel('Distance (km)')
ax2.set_ylabel('Count')
ax2.set_title('Distance Distribution')
ax2.legend()
ax2.grid(alpha=0.3)

# Risk levels
ax3 = fig.add_subplot(2, 3, 3)
risk_counts = collisions_df['risk_level'].value_counts()
colors = {'SAFE': 'green', 'CAUTION': 'orange', 'HIGH_RISK': 'red'}
ax3.bar(risk_counts.index, risk_counts.values, color=[colors[x] for x in risk_counts.index], alpha=0.7)
ax3.set_ylabel('Count')
ax3.set_title('Risk Level Distribution')
ax3.grid(axis='y', alpha=0.3)

# Distance vs velocity scatter
ax4 = fig.add_subplot(2, 3, 4)
scatter = ax4.scatter(collisions_df['distance'], collisions_df['relative_velocity'],
                     c=collisions_df['risk_level'].map({'SAFE': 0, 'CAUTION': 1, 'HIGH_RISK': 2}),
                     cmap='RdYlGn_r', alpha=0.5, s=10)
ax4.set_xlabel('Distance (km)')
ax4.set_ylabel('Relative Velocity (km/s)')
ax4.set_title('Distance vs Relative Velocity')
plt.colorbar(scatter, ax=ax4, label='Risk')
ax4.grid(alpha=0.3)

# Correlation heatmap
ax5 = fig.add_subplot(2, 3, 5)
corr_features = ['distance', 'relative_velocity', 'sat1_altitude', 'altitude_diff']
corr_matrix = collisions_df[corr_features].corr()
sns.heatmap(corr_matrix, annot=True, fmt='.2f', cmap='coolwarm', center=0, ax=ax5)
ax5.set_title('Feature Correlation')

# Time series
ax6 = fig.add_subplot(2, 3, 6)
time_stats = collisions_df.groupby('timestep')['distance'].agg(['mean', 'std'])
ax6.plot(time_stats.index, time_stats['mean'], color='navy', label='Mean Distance')
ax6.fill_between(time_stats.index, time_stats['mean'] - time_stats['std'], time_stats['mean'] + time_stats['std'], alpha=0.3, color='navy')
ax6.axhline(HIGH_RISK_THRESHOLD, color='red', linestyle='--', alpha=0.5)
ax6.axhline(CAUTION_THRESHOLD, color='orange', linestyle='--', alpha=0.5)
ax6.set_xlabel('Timestep')
ax6.set_ylabel('Distance (km)')
ax6.set_title('Average Distance Over Time')
ax6.legend()
ax6.grid(alpha=0.3)

plt.tight_layout()
plt.show()

print("\nStatistical Summary:")
print(collisions_df[['distance', 'relative_velocity', 'altitude_diff']].describe())

## üéØ Section 4: DBSCAN Clustering

In [None]:
X_cluster = collisions_df[['distance', 'relative_velocity', 'sat1_altitude', 'sat2_altitude']].values

scaler_dbscan = StandardScaler()
X_scaled = scaler_dbscan.fit_transform(X_cluster)

dbscan = DBSCAN(eps=DBSCAN_EPS/1000, min_samples=DBSCAN_MIN_SAMPLES)
clusters = dbscan.fit_predict(X_scaled)

collisions_df['dbscan_cluster'] = clusters

n_clusters = len(set(clusters)) - (1 if -1 in clusters else 0)
n_noise = list(clusters).count(-1)

print(f"Found {n_clusters} clusters with {n_noise} noise points ({n_noise/len(clusters)*100:.1f}%)")

if n_clusters > 1:
    silhouette = silhouette_score(X_scaled[clusters != -1], clusters[clusters != -1])
    print(f"Silhouette Score: {silhouette:.3f}")

fig = plt.figure(figsize=(18, 10))

# 3D clusters
ax1 = fig.add_subplot(2, 3, 1, projection='3d')
scatter = ax1.scatter(collisions_df['distance'], collisions_df['relative_velocity'], collisions_df['altitude_diff'],
                     c=clusters, cmap='viridis', alpha=0.6, s=20)
ax1.set_xlabel('Distance (km)')
ax1.set_ylabel('Relative Velocity (km/s)')
ax1.set_zlabel('Altitude Difference (km)')
ax1.set_title('DBSCAN Clusters (3D)')
plt.colorbar(scatter, ax=ax1, label='Cluster')

# 2D projection
ax2 = fig.add_subplot(2, 3, 2)
scatter2 = ax2.scatter(collisions_df['distance'], collisions_df['relative_velocity'], c=clusters, cmap='viridis', alpha=0.6, s=15)
ax2.set_xlabel('Distance (km)')
ax2.set_ylabel('Relative Velocity (km/s)')
ax2.set_title('Clusters: Distance vs Velocity')
plt.colorbar(scatter2, ax=ax2)
ax2.grid(alpha=0.3)

# Cluster sizes
ax3 = fig.add_subplot(2, 3, 3)
cluster_counts = pd.Series(clusters).value_counts().sort_index()
ax3.bar(cluster_counts.index, cluster_counts.values, color='teal', alpha=0.7, edgecolor='black')
ax3.set_xlabel('Cluster ID (-1 = Noise)')
ax3.set_ylabel('Points')
ax3.set_title('Cluster Sizes')
ax3.grid(axis='y', alpha=0.3)

# Risk per cluster
ax4 = fig.add_subplot(2, 3, 4)
cluster_risk = pd.crosstab(collisions_df['dbscan_cluster'], collisions_df['risk_level'])
cluster_risk.plot(kind='bar', stacked=True, ax=ax4, color=['green', 'orange', 'red'], alpha=0.7)
ax4.set_xlabel('Cluster')
ax4.set_ylabel('Count')
ax4.set_title('Risk Distribution per Cluster')
ax4.legend(title='Risk')
plt.xticks(rotation=0)
ax4.grid(axis='y', alpha=0.3)

# Eps sensitivity
ax5 = fig.add_subplot(2, 3, 5)
eps_range = np.linspace(100, 2000, 20)
n_clusters_list = []
for eps_val in eps_range:
    db = DBSCAN(eps=eps_val/1000, min_samples=DBSCAN_MIN_SAMPLES)
    labels = db.fit_predict(X_scaled)
    n_clusters_list.append(len(set(labels)) - (1 if -1 in labels else 0))

ax5.plot(eps_range, n_clusters_list, marker='o', color='navy', linewidth=2)
ax5.axvline(DBSCAN_EPS, color='red', linestyle='--', label=f'Current: {DBSCAN_EPS}km')
ax5.set_xlabel('Eps (km)')
ax5.set_ylabel('Number of Clusters')
ax5.set_title('Parameter Sensitivity')
ax5.legend()
ax5.grid(alpha=0.3)

# Cluster statistics
ax6 = fig.add_subplot(2, 3, 6)
cluster_stats = collisions_df.groupby('dbscan_cluster')[['distance', 'relative_velocity', 'altitude_diff']].mean()
sns.heatmap(cluster_stats.T, annot=True, fmt='.1f', cmap='YlOrRd', ax=ax6)
ax6.set_xlabel('Cluster')
ax6.set_title('Average Features per Cluster')

plt.tight_layout()
plt.show()

print("\nCluster Statistics:")
print(cluster_stats)

## üéØ Section 5: K-Means Clustering

In [None]:
kmeans = KMeans(n_clusters=KMEANS_CLUSTERS, random_state=RANDOM_SEED, n_init=10)
kmeans_clusters = kmeans.fit_predict(X_scaled)

collisions_df['kmeans_cluster'] = kmeans_clusters

inertia = kmeans.inertia_
silhouette_km = silhouette_score(X_scaled, kmeans_clusters)

print(f"‚ú® K-Means Clustering Results")
print(f"Clusters: {KMEANS_CLUSTERS} | Silhouette Score: {silhouette_km:.3f}")

# Single beautiful visualization
fig, ax = plt.subplots(figsize=(14, 9), facecolor='white')

# Create color map for risk levels
risk_colors = {'SAFE': '#2ecc71', 'CAUTION': '#f39c12', 'HIGH_RISK': '#e74c3c'}
colors = [risk_colors[risk] for risk in collisions_df['risk_level']]

# Main scatter plot
scatter = ax.scatter(collisions_df['distance'], 
                    collisions_df['relative_velocity'],
                    c=colors,
                    s=80,
                    alpha=0.6,
                    edgecolors='white',
                    linewidth=0.5)

# Add cluster centroids
centroids = scaler_dbscan.inverse_transform(kmeans.cluster_centers_)
ax.scatter(centroids[:, 0], centroids[:, 1], 
          c='navy', 
          marker='‚òÖ', 
          s=800, 
          edgecolors='gold',
          linewidths=3,
          label='Cluster Centers',
          zorder=5)

# Annotate each centroid
for i, (x, y) in enumerate(centroids[:, :2]):
    ax.annotate(f'C{i}', 
               xy=(x, y), 
               xytext=(0, 0), 
               textcoords='offset points',
               ha='center', 
               va='center',
               fontsize=11,
               fontweight='bold',
               color='white',
               zorder=6)

# Styling
ax.set_xlabel('Distance (km)', fontsize=13, fontweight='bold')
ax.set_ylabel('Relative Velocity (km/s)', fontsize=13, fontweight='bold')
ax.set_title('üõ∞Ô∏è Satellite Collision Risk Clustering\nK-Means Analysis', 
            fontsize=16, fontweight='bold', pad=20)

# Create custom legend
from matplotlib.patches import Patch
legend_elements = [
    Patch(facecolor='#2ecc71', label='SAFE', alpha=0.7),
    Patch(facecolor='#f39c12', label='CAUTION', alpha=0.7),
    Patch(facecolor='#e74c3c', label='HIGH RISK', alpha=0.7),
    plt.Line2D([0], [0], marker='‚òÖ', color='w', markerfacecolor='navy', 
              markeredgecolor='gold', markersize=15, label='Centroids', markeredgewidth=2)
]
ax.legend(handles=legend_elements, loc='upper right', fontsize=11, framealpha=0.95)

# Add grid
ax.grid(True, alpha=0.3, linestyle='--', linewidth=0.5)

# Add statistics box
stats_text = f'Clusters: {KMEANS_CLUSTERS}\nSilhouette: {silhouette_km:.3f}\nPoints: {len(collisions_df):,}'
ax.text(0.02, 0.98, stats_text, 
       transform=ax.transAxes,
       fontsize=10,
       verticalalignment='top',
       bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.8))

plt.tight_layout()
plt.show()

print(f"\nüìä Cluster Distribution:")
for i in range(KMEANS_CLUSTERS):
    cluster_data = collisions_df[collisions_df['kmeans_cluster'] == i]
    risk_counts = cluster_data['risk_level'].value_counts()
    print(f"  Cluster {i}: {len(cluster_data)} points - {dict(risk_counts)}")

## üå≥ Section 6: Decision Tree Classifier

In [None]:
feature_cols = ['distance', 'relative_velocity', 'sat1_altitude', 'sat2_altitude', 'altitude_diff']
X = collisions_df[feature_cols].values
y = collisions_df['risk_level'].values

label_map = {'SAFE': 0, 'CAUTION': 1, 'HIGH_RISK': 2}
y_encoded = np.array([label_map[label] for label in y])

X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=RANDOM_SEED, stratify=y_encoded)

dt_classifier = DecisionTreeClassifier(max_depth=TREE_MAX_DEPTH, min_samples_split=TREE_MIN_SAMPLES_SPLIT, random_state=RANDOM_SEED)
dt_classifier.fit(X_train, y_train)

y_pred_dt = dt_classifier.predict(X_test)
accuracy_dt = accuracy_score(y_test, y_pred_dt)

print(f"Accuracy: {accuracy_dt:.3f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred_dt, target_names=['SAFE', 'CAUTION', 'HIGH_RISK']))

fig = plt.figure(figsize=(18, 12))

# Tree structure
ax1 = fig.add_subplot(2, 3, 1)
plot_tree(dt_classifier, feature_names=feature_cols, class_names=['SAFE', 'CAUTION', 'HIGH_RISK'],
         filled=True, max_depth=3, fontsize=8, ax=ax1)
ax1.set_title('Decision Tree (depth=3)')

# Feature importance
ax2 = fig.add_subplot(2, 3, 2)
importances = dt_classifier.feature_importances_
indices = np.argsort(importances)[::-1]
ax2.barh(range(len(importances)), importances[indices], color='steelblue', alpha=0.7)
ax2.set_yticks(range(len(importances)))
ax2.set_yticklabels([feature_cols[i] for i in indices])
ax2.set_xlabel('Importance')
ax2.set_title('Feature Importance')
ax2.grid(axis='x', alpha=0.3)

# Confusion matrix
ax3 = fig.add_subplot(2, 3, 3)
cm_dt = confusion_matrix(y_test, y_pred_dt)
sns.heatmap(cm_dt, annot=True, fmt='d', cmap='Blues', ax=ax3,
           xticklabels=['SAFE', 'CAUTION', 'HIGH_RISK'], yticklabels=['SAFE', 'CAUTION', 'HIGH_RISK'])
ax3.set_xlabel('Predicted')
ax3.set_ylabel('Actual')
ax3.set_title('Confusion Matrix')

# Depth vs accuracy
ax4 = fig.add_subplot(2, 3, 4)
depths = range(1, 21)
train_scores = []
test_scores = []
for depth in depths:
    dt_temp = DecisionTreeClassifier(max_depth=depth, random_state=RANDOM_SEED)
    dt_temp.fit(X_train, y_train)
    train_scores.append(dt_temp.score(X_train, y_train))
    test_scores.append(dt_temp.score(X_test, y_test))

ax4.plot(depths, train_scores, 'o-', label='Train', linewidth=2)
ax4.plot(depths, test_scores, 's-', label='Test', linewidth=2)
ax4.axvline(TREE_MAX_DEPTH, color='red', linestyle='--', label=f'Current: {TREE_MAX_DEPTH}')
ax4.set_xlabel('Max Depth')
ax4.set_ylabel('Accuracy')
ax4.set_title('Complexity vs Accuracy')
ax4.legend()
ax4.grid(alpha=0.3)

# Metrics by class
ax5 = fig.add_subplot(2, 3, 5)
precision, recall, f1, _ = precision_recall_fscore_support(y_test, y_pred_dt)
x_pos = np.arange(3)
width = 0.25
ax5.bar(x_pos - width, precision, width, label='Precision', alpha=0.8)
ax5.bar(x_pos, recall, width, label='Recall', alpha=0.8)
ax5.bar(x_pos + width, f1, width, label='F1', alpha=0.8)
ax5.set_xticks(x_pos)
ax5.set_xticklabels(['SAFE', 'CAUTION', 'HIGH_RISK'])
ax5.set_ylabel('Score')
ax5.set_title('Metrics by Risk Level')
ax5.legend()
ax5.grid(axis='y', alpha=0.3)

# Prediction distribution
ax6 = fig.add_subplot(2, 3, 6)
pred_df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred_dt})
pred_counts = pd.crosstab(pred_df['Actual'], pred_df['Predicted'])
sns.heatmap(pred_counts, annot=True, fmt='d', cmap='YlGnBu', ax=ax6,
           xticklabels=['SAFE', 'CAUTION', 'HIGH_RISK'], yticklabels=['SAFE', 'CAUTION', 'HIGH_RISK'])
ax6.set_xlabel('Predicted')
ax6.set_ylabel('Actual')
ax6.set_title('Prediction Distribution')

plt.tight_layout()
plt.show()

## üî¢ Section 7: KNN Classifier

In [None]:
# ============================================
# üî¢ K-NEAREST NEIGHBORS CLASSIFIER
# ============================================

# Standardize features for KNN (distance-based algorithm)
scaler_knn = StandardScaler()
X_train_scaled = scaler_knn.fit_transform(X_train)
X_test_scaled = scaler_knn.transform(X_test)

# Train KNN
print(f"üîÑ Training KNN (n_neighbors={KNN_NEIGHBORS}, weights='{KNN_WEIGHTS}')...")
knn_classifier = KNeighborsClassifier(
    n_neighbors=KNN_NEIGHBORS,
    weights=KNN_WEIGHTS,
    metric='euclidean'
)
knn_classifier.fit(X_train_scaled, y_train)

# Predictions
y_pred_knn = knn_classifier.predict(X_test_scaled)
accuracy_knn = accuracy_score(y_test, y_pred_knn)

print(f"‚úÖ Training complete!")
print(f"üìä Accuracy: {accuracy_knn:.3f}")
print(f"\nüìã Classification Report:")
print(classification_report(y_test, y_pred_knn, 
                          target_names=['SAFE', 'CAUTION', 'HIGH_RISK']))

# Visualizations
fig = plt.figure(figsize=(18, 12))

# 1. K vs Accuracy (finding optimal K)
ax1 = fig.add_subplot(2, 3, 1)
k_values = range(1, 31, 2)
train_accuracies = []
test_accuracies = []

for k in k_values:
    knn_temp = KNeighborsClassifier(n_neighbors=k, weights=KNN_WEIGHTS)
    knn_temp.fit(X_train_scaled, y_train)
    train_accuracies.append(knn_temp.score(X_train_scaled, y_train))
    test_accuracies.append(knn_temp.score(X_test_scaled, y_test))

ax1.plot(k_values, train_accuracies, 'o-', label='Train Accuracy', linewidth=2)
ax1.plot(k_values, test_accuracies, 's-', label='Test Accuracy', linewidth=2)
ax1.axvline(KNN_NEIGHBORS, color='red', linestyle='--', 
           label=f'Current K={KNN_NEIGHBORS}')
ax1.set_xlabel('Number of Neighbors (K)')
ax1.set_ylabel('Accuracy')
ax1.set_title('Optimal K Selection', fontsize=12, fontweight='bold')
ax1.legend()
ax1.grid(alpha=0.3)

# 2. Confusion Matrix
ax2 = fig.add_subplot(2, 3, 2)
cm_knn = confusion_matrix(y_test, y_pred_knn)
sns.heatmap(cm_knn, annot=True, fmt='d', cmap='Greens', ax=ax2,
           xticklabels=['SAFE', 'CAUTION', 'HIGH_RISK'],
           yticklabels=['SAFE', 'CAUTION', 'HIGH_RISK'])
ax2.set_xlabel('Predicted')
ax2.set_ylabel('Actual')
ax2.set_title('Confusion Matrix', fontsize=12, fontweight='bold')

# 3. Decision Boundary (2D projection: distance vs relative_velocity)
ax3 = fig.add_subplot(2, 3, 3)
# Create mesh grid
x_min, x_max = X_test_scaled[:, 0].min() - 1, X_test_scaled[:, 0].max() + 1
y_min, y_max = X_test_scaled[:, 1].min() - 1, X_test_scaled[:, 1].max() + 1
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100),
                     np.linspace(y_min, y_max, 100))

# Predict on mesh (using mean values for other features)
mesh_features = np.c_[xx.ravel(), yy.ravel(), 
                      np.full(xx.ravel().shape, X_test_scaled[:, 2].mean()),
                      np.full(xx.ravel().shape, X_test_scaled[:, 3].mean()),
                      np.full(xx.ravel().shape, X_test_scaled[:, 4].mean())]
Z = knn_classifier.predict(mesh_features)
Z = Z.reshape(xx.shape)

# Plot decision boundary
ax3.contourf(xx, yy, Z, alpha=0.3, cmap='RdYlGn', levels=2)
scatter = ax3.scatter(X_test_scaled[:, 0], X_test_scaled[:, 1], 
                     c=y_test, cmap='RdYlGn', edgecolors='black', s=30, alpha=0.7)
ax3.set_xlabel('Distance (scaled)')
ax3.set_ylabel('Relative Velocity (scaled)')
ax3.set_title('KNN Decision Boundary (2D)', fontsize=12, fontweight='bold')
plt.colorbar(scatter, ax=ax3, ticks=[0, 1, 2], 
            label='Risk Level')

# 4. Weights Comparison (uniform vs distance)
ax4 = fig.add_subplot(2, 3, 4)
weight_types = ['uniform', 'distance']
accuracies_by_weight = []

for weight in weight_types:
    knn_w = KNeighborsClassifier(n_neighbors=KNN_NEIGHBORS, weights=weight)
    knn_w.fit(X_train_scaled, y_train)
    accuracies_by_weight.append(knn_w.score(X_test_scaled, y_test))

bars = ax4.bar(weight_types, accuracies_by_weight, color=['coral', 'teal'], alpha=0.7)
ax4.set_ylabel('Accuracy')
ax4.set_title('Weight Type Comparison', fontsize=12, fontweight='bold')
ax4.grid(axis='y', alpha=0.3)

# Highlight current weight
if KNN_WEIGHTS == 'uniform':
    bars[0].set_edgecolor('red')
    bars[0].set_linewidth(3)
else:
    bars[1].set_edgecolor('red')
    bars[1].set_linewidth(3)

# 5. Precision, Recall, F1 by Class
ax5 = fig.add_subplot(2, 3, 5)
precision_knn, recall_knn, f1_knn, _ = precision_recall_fscore_support(y_test, y_pred_knn)
x_pos = np.arange(3)
width = 0.25
ax5.bar(x_pos - width, precision_knn, width, label='Precision', alpha=0.8, color='navy')
ax5.bar(x_pos, recall_knn, width, label='Recall', alpha=0.8, color='darkgreen')
ax5.bar(x_pos + width, f1_knn, width, label='F1-Score', alpha=0.8, color='darkred')
ax5.set_xticks(x_pos)
ax5.set_xticklabels(['SAFE', 'CAUTION', 'HIGH_RISK'])
ax5.set_ylabel('Score')
ax5.set_ylim([0, 1.1])
ax5.set_title('Metrics by Risk Level', fontsize=12, fontweight='bold')
ax5.legend()
ax5.grid(axis='y', alpha=0.3)

# 6. Sample Neighbor Visualization
ax6 = fig.add_subplot(2, 3, 6)
# Pick a random test sample
sample_idx = np.random.randint(0, len(X_test_scaled))
sample = X_test_scaled[sample_idx:sample_idx+1]

# Find K nearest neighbors
distances, indices = knn_classifier.kneighbors(sample, n_neighbors=KNN_NEIGHBORS)

# Plot the sample and its neighbors (using first 2 features)
ax6.scatter(X_train_scaled[:, 0], X_train_scaled[:, 1], 
           c='lightgray', alpha=0.3, s=20, label='Other Training Data')
ax6.scatter(X_train_scaled[indices[0], 0], X_train_scaled[indices[0], 1],
           c=y_train[indices[0]], cmap='RdYlGn', s=100, 
           edgecolors='black', linewidths=2, label='K Neighbors')
ax6.scatter(sample[0, 0], sample[0, 1], c='blue', marker='*', 
           s=500, edgecolors='black', linewidths=2, label='Test Sample')
ax6.set_xlabel('Distance (scaled)')
ax6.set_ylabel('Relative Velocity (scaled)')
ax6.set_title(f'Sample Prediction: {KNN_NEIGHBORS} Nearest Neighbors', 
             fontsize=12, fontweight='bold')
ax6.legend()
ax6.grid(alpha=0.3)

plt.tight_layout()
plt.show()

print(f"\nüéØ Sample Test Case:")
print(f"   Predicted: {['SAFE', 'CAUTION', 'HIGH_RISK'][y_pred_knn[sample_idx]]}")
print(f"   Actual: {['SAFE', 'CAUTION', 'HIGH_RISK'][y_test[sample_idx]]}")
print(f"   Neighbor distances: {distances[0][:5]}")  # Show first 5

# ============================================
# ‚ö° SUPPORT VECTOR MACHINE CLASSIFIER
# ============================================

# Train SVM
print(f"üîÑ Training SVM (kernel='{SVM_KERNEL}', C={SVM_C}, gamma='{SVM_GAMMA}')...")
svm_classifier = SVC(
    C=SVM_C,
    kernel=SVM_KERNEL,
    gamma=SVM_GAMMA,
    random_state=RANDOM_SEED,
    probability=True  # Enable probability estimates for ROC curve
)
svm_classifier.fit(X_train_scaled, y_train)

# Predictions
y_pred_svm = svm_classifier.predict(X_test_scaled)
accuracy_svm = accuracy_score(y_test, y_pred_svm)

print(f"‚úÖ Training complete!")
print(f"üìä Accuracy: {accuracy_svm:.3f}")
print(f"üìä Number of support vectors: {svm_classifier.n_support_}")
print(f"\nüìã Classification Report:")
print(classification_report(y_test, y_pred_svm, 
                          target_names=['SAFE', 'CAUTION', 'HIGH_RISK']))

# Visualizations
fig = plt.figure(figsize=(18, 12))

# 1. Kernel Comparison
ax1 = fig.add_subplot(2, 3, 1)
kernels = ['linear', 'rbf', 'poly']
kernel_accuracies = []

for kernel in kernels:
    svm_temp = SVC(kernel=kernel, C=SVM_C, gamma=SVM_GAMMA, random_state=RANDOM_SEED)
    svm_temp.fit(X_train_scaled, y_train)
    kernel_accuracies.append(svm_temp.score(X_test_scaled, y_test))

bars = ax1.bar(kernels, kernel_accuracies, color=['coral', 'teal', 'purple'], alpha=0.7)
ax1.set_ylabel('Accuracy')
ax1.set_title('Kernel Comparison', fontsize=12, fontweight='bold')
ax1.grid(axis='y', alpha=0.3)

# Highlight current kernel
if SVM_KERNEL in kernels:
    idx = kernels.index(SVM_KERNEL)
    bars[idx].set_edgecolor('red')
    bars[idx].set_linewidth(3)

# 2. Confusion Matrix
ax2 = fig.add_subplot(2, 3, 2)
cm_svm = confusion_matrix(y_test, y_pred_svm)
sns.heatmap(cm_svm, annot=True, fmt='d', cmap='Purples', ax=ax2,
           xticklabels=['SAFE', 'CAUTION', 'HIGH_RISK'],
           yticklabels=['SAFE', 'CAUTION', 'HIGH_RISK'])
ax2.set_xlabel('Predicted')
ax2.set_ylabel('Actual')
ax2.set_title('Confusion Matrix', fontsize=12, fontweight='bold')

# 3. Decision Boundary (2D projection)
ax3 = fig.add_subplot(2, 3, 3)
x_min, x_max = X_test_scaled[:, 0].min() - 1, X_test_scaled[:, 0].max() + 1
y_min, y_max = X_test_scaled[:, 1].min() - 1, X_test_scaled[:, 1].max() + 1
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100),
                     np.linspace(y_min, y_max, 100))

mesh_features = np.c_[xx.ravel(), yy.ravel(), 
                      np.full(xx.ravel().shape, X_test_scaled[:, 2].mean()),
                      np.full(xx.ravel().shape, X_test_scaled[:, 3].mean()),
                      np.full(xx.ravel().shape, X_test_scaled[:, 4].mean())]
Z = svm_classifier.predict(mesh_features)
Z = Z.reshape(xx.shape)

ax3.contourf(xx, yy, Z, alpha=0.3, cmap='RdYlGn', levels=2)
scatter = ax3.scatter(X_test_scaled[:, 0], X_test_scaled[:, 1], 
                     c=y_test, cmap='RdYlGn', edgecolors='black', s=30, alpha=0.7)
ax3.set_xlabel('Distance (scaled)')
ax3.set_ylabel('Relative Velocity (scaled)')
ax3.set_title('SVM Decision Boundary', fontsize=12, fontweight='bold')
plt.colorbar(scatter, ax=ax3, ticks=[0, 1, 2], label='Risk Level')

# 4. C Parameter Sensitivity
ax4 = fig.add_subplot(2, 3, 4)
c_values = [0.1, 0.5, 1, 5, 10, 50, 100]
c_train_scores = []
c_test_scores = []

for c in c_values:
    svm_c = SVC(C=c, kernel=SVM_KERNEL, gamma=SVM_GAMMA, random_state=RANDOM_SEED)
    svm_c.fit(X_train_scaled, y_train)
    c_train_scores.append(svm_c.score(X_train_scaled, y_train))
    c_test_scores.append(svm_c.score(X_test_scaled, y_test))

ax4.semilogx(c_values, c_train_scores, 'o-', label='Train Accuracy', linewidth=2)
ax4.semilogx(c_values, c_test_scores, 's-', label='Test Accuracy', linewidth=2)
ax4.axvline(SVM_C, color='red', linestyle='--', label=f'Current C={SVM_C}')
ax4.set_xlabel('C (Regularization)')
ax4.set_ylabel('Accuracy')
ax4.set_title('C Parameter Sensitivity', fontsize=12, fontweight='bold')
ax4.legend()
ax4.grid(alpha=0.3)

# 5. ROC Curves (One-vs-Rest)
ax5 = fig.add_subplot(2, 3, 5)
y_score = svm_classifier.predict_proba(X_test_scaled)

for i, class_name in enumerate(['SAFE', 'CAUTION', 'HIGH_RISK']):
    # Binarize the labels
    y_test_binary = (y_test == i).astype(int)
    fpr, tpr, _ = roc_curve(y_test_binary, y_score[:, i])
    roc_auc = auc(fpr, tpr)
    ax5.plot(fpr, tpr, linewidth=2, 
            label=f'{class_name} (AUC = {roc_auc:.2f})')

ax5.plot([0, 1], [0, 1], 'k--', linewidth=1, label='Random')
ax5.set_xlabel('False Positive Rate')
ax5.set_ylabel('True Positive Rate')
ax5.set_title('ROC Curves (One-vs-Rest)', fontsize=12, fontweight='bold')
ax5.legend()
ax5.grid(alpha=0.3)

# 6. Precision, Recall, F1 by Class
ax6 = fig.add_subplot(2, 3, 6)
precision_svm, recall_svm, f1_svm, _ = precision_recall_fscore_support(y_test, y_pred_svm)
x_pos = np.arange(3)
width = 0.25
ax6.bar(x_pos - width, precision_svm, width, label='Precision', alpha=0.8, color='darkblue')
ax6.bar(x_pos, recall_svm, width, label='Recall', alpha=0.8, color='darkgreen')
ax6.bar(x_pos + width, f1_svm, width, label='F1-Score', alpha=0.8, color='darkred')
ax6.set_xticks(x_pos)
ax6.set_xticklabels(['SAFE', 'CAUTION', 'HIGH_RISK'])
ax6.set_ylabel('Score')
ax6.set_ylim([0, 1.1])
ax6.set_title('Metrics by Risk Level', fontsize=12, fontweight='bold')
ax6.legend()
ax6.grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.show()

print(f"\n‚ö° SVM Support Vectors per Class:")
for i, class_name in enumerate(['SAFE', 'CAUTION', 'HIGH_RISK']):
    print(f"   {class_name}: {svm_classifier.n_support_[i]} support vectors")

# ============================================
# üìà ALGORITHM COMPARISON DASHBOARD
# ============================================

# Collect all results
algorithms = ['Decision Tree', 'KNN', 'SVM']
predictions = [y_pred_dt, y_pred_knn, y_pred_svm]
accuracies = [accuracy_dt, accuracy_knn, accuracy_svm]

# Calculate comprehensive metrics
all_metrics = []
for alg, y_pred in zip(algorithms, predictions):
    precision, recall, f1, _ = precision_recall_fscore_support(y_test, y_pred, average='weighted')
    all_metrics.append({
        'Algorithm': alg,
        'Accuracy': accuracy_score(y_test, y_pred),
        'Precision': precision,
        'Recall': recall,
        'F1-Score': f1
    })

metrics_df = pd.DataFrame(all_metrics)

print("üìä ALGORITHM COMPARISON")
print("=" * 70)
print(metrics_df.to_string(index=False))
print("=" * 70)

# Visualizations
fig = plt.figure(figsize=(18, 12))

# 1. Accuracy Comparison
ax1 = fig.add_subplot(2, 3, 1)
bars = ax1.bar(algorithms, accuracies, color=['forestgreen', 'steelblue', 'purple'], alpha=0.7)
ax1.set_ylabel('Accuracy')
ax1.set_ylim([0, 1.1])
ax1.set_title('Accuracy Comparison', fontsize=12, fontweight='bold')
ax1.grid(axis='y', alpha=0.3)

# Add value labels on bars
for bar in bars:
    height = bar.get_height()
    ax1.text(bar.get_x() + bar.get_width()/2., height,
            f'{height:.3f}', ha='center', va='bottom', fontweight='bold')

# 2. Metrics Radar Chart
ax2 = fig.add_subplot(2, 3, 2, projection='polar')
categories = ['Accuracy', 'Precision', 'Recall', 'F1-Score']
N = len(categories)
angles = [n / float(N) * 2 * np.pi for n in range(N)]
angles += angles[:1]

for i, alg in enumerate(algorithms):
    values = metrics_df.iloc[i][1:].values.tolist()
    values += values[:1]
    ax2.plot(angles, values, 'o-', linewidth=2, label=alg)
    ax2.fill(angles, values, alpha=0.15)

ax2.set_xticks(angles[:-1])
ax2.set_xticklabels(categories)
ax2.set_ylim(0, 1)
ax2.set_title('Performance Radar Chart', fontsize=12, fontweight='bold', pad=20)
ax2.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1))
ax2.grid(True)

# 3. Confusion Matrices Comparison
ax3 = fig.add_subplot(2, 3, 3)
confusion_matrices = [
    confusion_matrix(y_test, y_pred_dt),
    confusion_matrix(y_test, y_pred_knn),
    confusion_matrix(y_test, y_pred_svm)
]

# Combine confusion matrices side by side
combined_cm = np.hstack(confusion_matrices)
sns.heatmap(combined_cm, annot=True, fmt='d', cmap='YlOrRd', ax=ax3, cbar=False)
ax3.set_ylabel('Actual')
ax3.set_title('Confusion Matrices: DT | KNN | SVM', fontsize=12, fontweight='bold')
ax3.set_xticks([1.5, 4.5, 7.5])
ax3.set_xticklabels(['DT', 'KNN', 'SVM'])
ax3.set_yticks([0.5, 1.5, 2.5])
ax3.set_yticklabels(['SAFE', 'CAUTION', 'HIGH_RISK'])

# 4. Per-Class F1 Scores
ax4 = fig.add_subplot(2, 3, 4)
f1_scores = {
    'Decision Tree': precision_recall_fscore_support(y_test, y_pred_dt)[2],
    'KNN': precision_recall_fscore_support(y_test, y_pred_knn)[2],
    'SVM': precision_recall_fscore_support(y_test, y_pred_svm)[2]
}

x = np.arange(3)
width = 0.25
for i, (alg, scores) in enumerate(f1_scores.items()):
    ax4.bar(x + i*width, scores, width, label=alg, alpha=0.8)

ax4.set_xticks(x + width)
ax4.set_xticklabels(['SAFE', 'CAUTION', 'HIGH_RISK'])
ax4.set_ylabel('F1-Score')
ax4.set_ylim([0, 1.1])
ax4.set_title('F1-Score by Risk Level', fontsize=12, fontweight='bold')
ax4.legend()
ax4.grid(axis='y', alpha=0.3)

# 5. ROC Curves Comparison (for one class - HIGH_RISK)
ax5 = fig.add_subplot(2, 3, 5)
y_test_binary = (y_test == 2).astype(int)  # HIGH_RISK class

# Decision Tree
dt_proba = DecisionTreeClassifier(max_depth=TREE_MAX_DEPTH, random_state=RANDOM_SEED)
dt_proba.fit(X_train, y_train)
dt_scores = dt_proba.predict_proba(X_test)[:, 2]
fpr_dt, tpr_dt, _ = roc_curve(y_test_binary, dt_scores)
auc_dt = auc(fpr_dt, tpr_dt)

# KNN
knn_scores = knn_classifier.predict_proba(X_test_scaled)[:, 2]
fpr_knn, tpr_knn, _ = roc_curve(y_test_binary, knn_scores)
auc_knn = auc(fpr_knn, tpr_knn)

# SVM
svm_scores = svm_classifier.predict_proba(X_test_scaled)[:, 2]
fpr_svm, tpr_svm, _ = roc_curve(y_test_binary, svm_scores)
auc_svm = auc(fpr_svm, tpr_svm)

ax5.plot(fpr_dt, tpr_dt, linewidth=2, label=f'Decision Tree (AUC={auc_dt:.2f})')
ax5.plot(fpr_knn, tpr_knn, linewidth=2, label=f'KNN (AUC={auc_knn:.2f})')
ax5.plot(fpr_svm, tpr_svm, linewidth=2, label=f'SVM (AUC={auc_svm:.2f})')
ax5.plot([0, 1], [0, 1], 'k--', linewidth=1, label='Random')
ax5.set_xlabel('False Positive Rate')
ax5.set_ylabel('True Positive Rate')
ax5.set_title('ROC Curves (HIGH_RISK Detection)', fontsize=12, fontweight='bold')
ax5.legend()
ax5.grid(alpha=0.3)

# 6. Prediction Agreement Heatmap
ax6 = fig.add_subplot(2, 3, 6)
agreement_matrix = np.zeros((3, 3))
agreement_matrix[0, 1] = np.sum(y_pred_dt == y_pred_knn) / len(y_pred_dt)
agreement_matrix[0, 2] = np.sum(y_pred_dt == y_pred_svm) / len(y_pred_dt)
agreement_matrix[1, 2] = np.sum(y_pred_knn == y_pred_svm) / len(y_pred_knn)
agreement_matrix[1, 0] = agreement_matrix[0, 1]
agreement_matrix[2, 0] = agreement_matrix[0, 2]
agreement_matrix[2, 1] = agreement_matrix[1, 2]
np.fill_diagonal(agreement_matrix, 1.0)

sns.heatmap(agreement_matrix, annot=True, fmt='.2f', cmap='coolwarm', 
           xticklabels=['DT', 'KNN', 'SVM'],
           yticklabels=['DT', 'KNN', 'SVM'],
           ax=ax6, vmin=0, vmax=1)
ax6.set_title('Algorithm Agreement Matrix', fontsize=12, fontweight='bold')

plt.tight_layout()
plt.show()

# Print summary
print("\nüèÜ BEST ALGORITHM:")
best_idx = np.argmax(accuracies)
print(f"   {algorithms[best_idx]} with {accuracies[best_idx]:.3f} accuracy")

print("\nüìä ALGORITHM STRENGTHS:")
print(f"   Decision Tree: Interpretability ‚≠ê‚≠ê‚≠ê‚≠ê‚≠ê | Speed ‚≠ê‚≠ê‚≠ê‚≠ê")
print(f"   KNN: Simplicity ‚≠ê‚≠ê‚≠ê‚≠ê‚≠ê | Non-parametric ‚≠ê‚≠ê‚≠ê‚≠ê")
print(f"   SVM: Margin optimization ‚≠ê‚≠ê‚≠ê‚≠ê‚≠ê | Kernel flexibility ‚≠ê‚≠ê‚≠ê‚≠ê‚≠ê")

In [None]:
# Custom test satellite input
test_sat1 = {
    'distance': 8.5,
    'relative_velocity': 2.3,
    'sat1_altitude': 600,
    'sat2_altitude': 650,
    'altitude_diff': 50
}

test_sat2 = {
    'distance': 3.2,
    'relative_velocity': 4.1,
    'sat1_altitude': 1200,
    'sat2_altitude': 1180,
    'altitude_diff': 20
}

test_cases = pd.DataFrame([test_sat1, test_sat2])
X_custom = test_cases[feature_cols].values

# Scale for KNN and SVM
X_custom_scaled = scaler_knn.transform(X_custom)

# Predictions from all algorithms
dt_pred = dt_classifier.predict(X_custom)
knn_pred = knn_classifier.predict(X_custom_scaled)
svm_pred = svm_classifier.predict(X_custom_scaled)

risk_labels = ['SAFE', 'CAUTION', 'HIGH_RISK']

print("Custom Satellite Collision Predictions:\n")
print("Test Case 1:")
print(f"  Distance: {test_sat1['distance']}km, Velocity: {test_sat1['relative_velocity']}km/s")
print(f"  Decision Tree: {risk_labels[dt_pred[0]]}")
print(f"  KNN: {risk_labels[knn_pred[0]]}")
print(f"  SVM: {risk_labels[svm_pred[0]]}")

print("\nTest Case 2:")
print(f"  Distance: {test_sat2['distance']}km, Velocity: {test_sat2['relative_velocity']}km/s")
print(f"  Decision Tree: {risk_labels[dt_pred[1]]}")
print(f"  KNN: {risk_labels[knn_pred[1]]}")
print(f"  SVM: {risk_labels[svm_pred[1]]}")

# Visualization
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

for idx, (ax, case_name) in enumerate(zip(axes, ['Test Case 1', 'Test Case 2'])):
    predictions = [risk_labels[dt_pred[idx]], risk_labels[knn_pred[idx]], risk_labels[svm_pred[idx]]]
    colors = {'SAFE': 'green', 'CAUTION': 'orange', 'HIGH_RISK': 'red'}
    bar_colors = [colors[p] for p in predictions]
    
    ax.bar(['Decision Tree', 'KNN', 'SVM'], [1, 1, 1], color=bar_colors, alpha=0.7, edgecolor='black')
    ax.set_ylabel('Prediction')
    ax.set_title(f'{case_name} Predictions')
    ax.set_ylim([0, 1.5])
    ax.set_yticks([])
    
    for i, pred in enumerate(predictions):
        ax.text(i, 0.5, pred, ha='center', va='center', fontweight='bold', fontsize=12)

plt.tight_layout()
plt.show()

print("\nTo test your own scenarios, modify the test_sat1 and test_sat2 dictionaries above and re-run this cell.")

## üéÆ Section 10: Interactive Playground