In [2]:
"""
01_exploration.ipynb

Notebook outline for Landslide Detection â€” Data Exploration and Quick Model Run.
Run this in JupyterLab / VS Code notebooks.

Install deps:
  pip install pandas matplotlib seaborn scikit-learn torch joblib
"""

# %% [markdown]
# # ðŸŒ‹ Landslide Detection: Data Exploration & Quick Training
# This notebook visualizes the GEE-exported dataset and runs a quick Random Forest model.

# %%
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load processed features
df = pd.read_csv('../data/processed/features.csv')
print(f"Dataset shape: {df.shape}")
df.head()

# %% [markdown]
# ## Feature Correlations
# Let's visualize correlation between spectral and terrain features.

# %%
plt.figure(figsize=(10,8))
sns.heatmap(df.corr(), cmap='coolwarm', center=0)
plt.title('Feature Correlation Heatmap')
plt.show()

# %% [markdown]
# ## Class Distribution

# %%
df['label'].value_counts().plot(kind='bar', color=['green','red'])
plt.title('Landslide vs Non-Landslide Samples')
plt.show()

# %% [markdown]
# ## Quick Random Forest Baseline

# %%
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import joblib

X = df.drop(columns=['label']).values
y = df['label'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

model = RandomForestClassifier(n_estimators=200, random_state=42, class_weight='balanced')
model.fit(X_train, y_train)

preds = model.predict(X_test)
print(classification_report(y_test, preds))

# %% [markdown]
# ## Confusion Matrix

# %%
cm = confusion_matrix(y_test, preds)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

# %% [markdown]
# ## Save Model

# %%
joblib.dump(model, '../models/rf_baseline.pkl')
print('âœ… Model saved as models/rf_baseline.pkl')

# %% [markdown]
# Next steps:
# - Visualize feature importances
# - Try CNN training via `train_model.py --model cnn`
# - Deploy results using the web app

FileNotFoundError: [Errno 2] No such file or directory: '../data/processed/features.csv'