In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

# Load the dataset
df = pd.read_csv("velocity_features.csv")

# Drop unnecessary columns (e.g., metadata)
drop_cols = ['station_id', 'channel', 'event_name']
df = df.drop(columns=drop_cols, errors='ignore')

# --- Automatically Generate Integrity Labels ---
# Rule: Compromised if ptp_vel > 1e-4 or energy_vel > 1e-7
df['integrity'] = df.apply(
    lambda row: 1 if (row['ptp_vel'] > 1e-4 or row['energy_vel'] > 1e-7) else 0,
    axis=1
)

# Split features and target
X = df.drop(columns=['integrity'])
y = df['integrity']

# Save feature column order for consistent prediction later
feature_columns = X.columns.tolist()

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.25, random_state=42
)

# Train the model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict on test set
y_pred = model.predict(X_test)

# Evaluation
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# --- Predict on new data example ---
# Define new sample with ALL required features (fill missing ones with reasonable defaults)
new_sample_input = {
    'mean_vel': 1e-9,
    'std_vel': 2e-6,
    'max_vel': 1.2e-5,
    'min_vel': -1.5e-5,
    'ptp_vel': 2.7e-5,
    'rms_vel': 2e-6,
    'energy_vel': 1.4e-9,
    'dominant_freq_vel': 0.3,
    'spectral_centroid_vel': 1.4,
    'sampling_rate': 100.0  # Add any required feature here
}

# Create DataFrame aligned with training features
new_sample = pd.DataFrame([new_sample_input])

# Ensure it has all training features in the correct order
missing_cols = set(feature_columns) - set(new_sample.columns)
for col in missing_cols:
    new_sample[col] = 0.0  # or a default/fallback value

new_sample = new_sample[feature_columns]  # Reorder columns to match training

# Scale and predict
new_scaled = scaler.transform(new_sample)
prediction = model.predict(new_scaled)

print(f"\nPredicted building integrity: {'Compromised' if prediction[0] == 1 else 'Intact'}")


Confusion Matrix:
[[25  0]
 [ 0 14]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        25
           1       1.00      1.00      1.00        14

    accuracy                           1.00        39
   macro avg       1.00      1.00      1.00        39
weighted avg       1.00      1.00      1.00        39


Predicted building integrity: Intact
