In [20]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
import joblib
from sklearn.metrics import classification_report

In [21]:
# Load data
df = pd.read_csv("uni7_data.csv")

# Create obstacle label based on timeOfFlight
# Assuming values below 1000 indicate an obstacle (adjust threshold as needed)
OBSTACLE_THRESHOLD = 1000
df['obstacle'] = (df['timeOfFlight'] < OBSTACLE_THRESHOLD).astype(int)

# Check class distribution
print("Class distribution:")
print(df['obstacle'].value_counts())

# Feature selection
X = df[['timeOfFlight', 'hc', 'speed']]
y = df['obstacle']

# Split data with stratification
X_train, X_test, y_train, y_test = train_test_split(
    X, y, 
    test_size=0.2, 
    random_state=42,
    stratify=y
)

# Create and train model
model = DecisionTreeClassifier(
    max_depth=3,
    random_state=42,
    class_weight='balanced'  # Helps with imbalanced classes
)
model.fit(X_train, y_train)

# Save model
joblib.dump(model, 'obstacle_dt.joblib')
print("\nModel saved to obstacle_dt.joblib")

# Evaluate model
y_pred = model.predict(X_test)
print("\nModel Evaluation:")
print(classification_report(y_test, y_pred))

# Feature importance
print("\nFeature Importances:")
for name, importance in zip(X.columns, model.feature_importances_):
    print(f"{name}: {importance:.2f}")

Class distribution:
obstacle
0    228
1      8
Name: count, dtype: int64

Model saved to obstacle_dt.joblib

Model Evaluation:
              precision    recall  f1-score   support

           0       1.00      0.98      0.99        46
           1       0.67      1.00      0.80         2

    accuracy                           0.98        48
   macro avg       0.83      0.99      0.89        48
weighted avg       0.99      0.98      0.98        48


Feature Importances:
timeOfFlight: 1.00
hc: 0.00
speed: 0.00
