In [None]:
# model_training.ipynb

import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import joblib

# 1. Load the forestfires.csv dataset
df = pd.read_csv("forestfires.csv")

# Typical columns in forestfires.csv:
# X, Y, month, day, FFMC, DMC, DC, ISI, temp, RH, wind, rain, area
# month and day are strings (e.g. "mar", "sun"), so we must convert them to numeric.

# 2. Convert 'month' from string to numeric (jan=1, feb=2, etc.)
month_map = {
    'jan':1, 'feb':2, 'mar':3, 'apr':4, 'may':5, 'jun':6,
    'jul':7, 'aug':8, 'sep':9, 'oct':10, 'nov':11, 'dec':12
}
df["month"] = df["month"].str.lower().map(month_map)

# 3. Convert 'day' from string to numeric (mon=1, tue=2, etc.)
day_map = {
    'mon':1, 'tue':2, 'wed':3, 'thu':4, 'fri':5, 'sat':6, 'sun':7
}
df["day"] = df["day"].str.lower().map(day_map)

# 4. Create a binary target: fire=1 if area>0, else 0
df["fire"] = (df["area"] > 0).astype(int)

# 5. Define features and target
# We'll ignore 'area' now that we have a fire/no-fire target
feature_cols = ["X","Y","month","day","FFMC","DMC","DC","ISI","temp","RH","wind","rain"]
X = df[feature_cols]
y = df["fire"]

# 6. Split into train/test
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.2,
                                                    random_state=42,
                                                    stratify=y)

# 7. Train a RandomForest
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# 8. Evaluate
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print("Test Accuracy:", acc)

# 9. Save the model
joblib.dump(model, "forest_fire_model.pkl")
print("Model saved as forest_fire_model.pkl")
