In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

# 1. Load your CSV
df = pd.read_csv('sensor_log.csv')

# 2. Clean column names
df.columns = df.columns.str.strip()

# 3. Convert all numeric columns to float
for col in df.columns:
    df[col] = pd.to_numeric(df[col], errors='coerce')

# 4. Drop rows with NaNs (invalid entries)
df = df.dropna()

# 5. Create 'Label' column based on custom rules
def classify_environment(row):
    if (
        20 <= row['Temp(C)'] <= 40 and
        30 <= row['Humidity(%)'] <= 70 and
        row['Pressure(hPa)'] >= 900 and
        row['Gas'] < 300 and
        row['Light'] >= 50 and
        row['Proximity'] <= 3
    ):
        return 'Safe'
    else:
        return 'Unsafe'

df['Label'] = df.apply(classify_environment, axis=1)

# 6. Split into features and target
X = df.drop('Label', axis=1)
y = df['Label']

# 7. Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 8. Train Decision Tree
clf = DecisionTreeClassifier()
clf.fit(X_train, y_train)

# 9. Predict and evaluate
y_pred = clf.predict(X_test)

print("✅ Accuracy:", accuracy_score(y_test, y_pred))
print("✅ Classification Report:")
print(classification_report(y_test, y_pred))

✅ Accuracy: 1.0
✅ Classification Report:
              precision    recall  f1-score   support

      Unsafe       1.00      1.00      1.00         9

    accuracy                           1.00         9
   macro avg       1.00      1.00      1.00         9
weighted avg       1.00      1.00      1.00         9

