In [1]:
import pandas as pd

# Load the downloaded CSV file (or your local file)
df = pd.read_csv(r"C:\Users\Binuda Dewhan\Downloads\sensorlog_3.csv")

In [2]:
# Full code for Step 1: Data Loading, Cleaning, Filtering (excluding April 22), and Labeling

# Filter out rows with date = '2025-04-22'
df = df[df['date'] != '2025-04-22']

# Select relevant columns
df_model = df[['temperature_dht11', 'humidity_dht11', 'gas1']].copy()

# Remove any rows where gas1 is the dummy value (4095 means invalid/unused reading)
df_model = df_model[df_model['gas1'] < 4000]

In [3]:
def classify_risk(row):
    temp = row['temperature_dht11']
    hum = row['humidity_dht11']
    gas = row['gas1']
    
    # High Risk
    if temp > 11 or gas > 400 or (temp > 8 and hum > 75):
        return "High"
    
    # Warning
    elif (temp > 9.5 and temp <= 11) or (gas > 300 and gas <= 400) or (hum > 70 and hum <= 75):
        return "Warning"
    
    # Safe
    elif temp >= 2 and temp <= 9.5 and hum >= 40 and hum <= 70 and gas <= 300:
        return "Safe"
    
    # Unknown/Edge case
    else:
        return "Warning"


In [4]:
df_model["RiskLevel"] = df_model.apply(classify_risk, axis=1)


In [5]:
df_model["RiskLevel"].value_counts()


RiskLevel
Safe       3256
High       1051
Name: count, dtype: int64

In [6]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
df_model["RiskEncoded"] = le.fit_transform(df_model["RiskLevel"])

# Optional: Print what each label maps to
print(dict(zip(le.classes_, le.transform(le.classes_))))




In [7]:
from sklearn.model_selection import train_test_split

X = df_model[['temperature_dht11', 'humidity_dht11', 'gas1']]
y = df_model['RiskEncoded']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [8]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Step 1: Initialize the model
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Step 2: Train the model
model.fit(X_train, y_train)

# Step 3: Predict on the test set
y_pred = model.predict(X_test)

# Step 4: Evaluate performance
print("✅ Accuracy:", accuracy_score(y_test, y_pred))
print("\n📊 Classification Report:\n", classification_report(y_test, y_pred, target_names=le.classes_))
print("\n🔍 Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


✅ Accuracy: 0.9994438264738599

📊 Classification Report:
               precision    recall  f1-score   support

        High       1.00      1.00      1.00       210
        Safe       1.00      1.00      1.00       635

    accuracy                           1.00      1798
   macro avg       1.00      1.00      1.00      1798
weighted avg       1.00      1.00      1.00      1798


🔍 Confusion Matrix:
 [[210   0   0]
 [  0 635   0]
 [  0   1 952]]


In [9]:
import pickle

# Save the trained model
with open("food_risk_model.pkl", "wb") as file:
    pickle.dump(model, file)

# Save the label encoder as well (to decode predictions later)
with open("label_encoder.pkl", "wb") as file:
    pickle.dump(le, file)

print("✅ Model and label encoder saved.")


✅ Model and label encoder saved.
