<a href="https://colab.research.google.com/github/Eranga-j/firesense_project/blob/main/model/FireSense_Model_Training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!git clone https://github.com/Eranga-j/firesense_project.git


Cloning into 'firesense_project'...
remote: Enumerating objects: 33, done.[K
remote: Counting objects: 100% (33/33), done.[K
remote: Compressing objects: 100% (28/28), done.[K
remote: Total 33 (delta 4), reused 0 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (33/33), 429.53 KiB | 1.96 MiB/s, done.
Resolving deltas: 100% (4/4), done.


In [None]:
import pandas as pd

df = pd.read_csv(
    "/content/firesense_project/data/srilanka_firesense_data.csv"
)

print(df.shape)
print(df["risk_level"].value_counts())
df.head()


(10000, 40)
risk_level
Low       5818
Medium    3962
High       220
Name: count, dtype: int64


Unnamed: 0,equipment_id,hotel_id,equipment_type,location,exact_location,install_year,manufacturer,condition,hotel_name,city,...,backup_power_available,technician_experience_years,service_quality_score,certification_status,last_audit_score,activated_before,activation_count,last_activation_year,floor_level,near_emergency_exit
0,FE000001,H0060,Fire Alarm,Conference Hall,Floor 1 – Corridor 3,2012,Local,Good,Aitken Palace Galle,Galle,...,Yes,1,4,Valid,87,No,0,,1,No
1,FE000002,H0113,Smoke Detector,Kitchen,Floor 1 – Corridor 2,2011,Local,Fair,Serendib Heights Badulla,Badulla,...,No,9,4,Valid,86,No,0,,1,No
2,FE000003,H0206,Fire Alarm,Conference Hall,Floor 1 – Corridor 1,2019,Honeywell,Good,Jetwing Bay Badulla,Badulla,...,No,9,4,Valid,95,No,0,,1,Yes
3,FE000004,H0183,Sprinkler,Conference Hall,Conference Hall – Zone 1,2016,Viking,Good,Ocean Bay Monaragala,Monaragala,...,No,13,3,Valid,90,No,0,,0,Yes
4,FE000005,H0038,Sprinkler,Conference Hall,Kitchen – Zone 3,2024,Tyco,Fair,Cinnamon Cove Ratnapura,Ratnapura,...,Yes,10,3,Valid,92,No,0,,0,No


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# target
y = df["risk_level"]

# pick SAFE feature columns (no IDs, no hotel_name, no exact_location)
feature_cols = [
    "equipment_type", "install_year", "equipment_age_years", "manufacturer", "condition",
    "months_since_service", "service_interval_months", "fault_count", "service_quality_score",
    "humidity_level", "temperature_avg_c", "coastal_exposure", "daily_exposure_hours",
    "power_fluctuation_level", "technician_experience_years", "certification_status",
    "last_audit_score", "activation_count", "activated_before",
    "floor_level", "near_emergency_exit", "backup_power_available", "usage_level"
]

X = df[feature_cols].copy()

# split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# automatically separate numeric and categorical by dtype
num_cols = X.select_dtypes(include=["number"]).columns.tolist()
cat_cols = X.select_dtypes(exclude=["number"]).columns.tolist()

numeric_transformer = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="median"))
])

categorical_transformer = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("onehot", OneHotEncoder(handle_unknown="ignore"))
])

preprocess = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, num_cols),
        ("cat", categorical_transformer, cat_cols),
    ]
)

model = RandomForestClassifier(
    n_estimators=300,
    random_state=42,
    class_weight="balanced"
)

pipeline = Pipeline(steps=[
    ("preprocess", preprocess),
    ("model", model)
])

pipeline.fit(X_train, y_train)
pred = pipeline.predict(X_test)

print("Accuracy:", accuracy_score(y_test, pred))
print(classification_report(y_test, pred))
print("Numeric columns:", num_cols)
print("Categorical columns:", cat_cols)


Accuracy: 0.9935
              precision    recall  f1-score   support

        High       1.00      0.89      0.94        44
         Low       1.00      1.00      1.00      1164
      Medium       0.99      1.00      0.99       792

    accuracy                           0.99      2000
   macro avg       0.99      0.96      0.98      2000
weighted avg       0.99      0.99      0.99      2000

Numeric columns: ['install_year', 'equipment_age_years', 'months_since_service', 'service_interval_months', 'fault_count', 'service_quality_score', 'temperature_avg_c', 'daily_exposure_hours', 'technician_experience_years', 'last_audit_score', 'activation_count', 'floor_level']
Categorical columns: ['equipment_type', 'manufacturer', 'condition', 'humidity_level', 'coastal_exposure', 'power_fluctuation_level', 'certification_status', 'activated_before', 'near_emergency_exit', 'backup_power_available', 'usage_level']


In [None]:
import joblib
joblib.dump(pipeline, "firesense_model.pkl")
print("Saved firesense_model.pkl")


Saved firesense_model.pkl


In [None]:
from google.colab import files
files.download("firesense_model.pkl")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>