In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler

In [2]:
data = pd.read_csv('smoke_detection_iot.csv') 

In [3]:
#data['Fire Alarm'] = data['Fire Alarm'].apply(lambda x: 1 if x > 0 else 0)

In [4]:
selected_features = ['Humidity[%]', 'TVOC[ppb]', 'eCO2[ppm]', 'Pressure[hPa]']
data = data[selected_features + ['Fire Alarm']]

In [5]:
X = data[selected_features]
y = data['Fire Alarm']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1992)
X

Unnamed: 0,Humidity[%],TVOC[ppb],eCO2[ppm],Pressure[hPa]
0,57.36,0,400,939.735
1,56.67,0,400,939.744
2,55.96,0,400,939.738
3,55.28,0,400,939.736
4,54.69,0,400,939.744
...,...,...,...,...
62625,15.79,625,400,936.670
62626,15.87,612,400,936.678
62627,15.84,627,400,936.687
62628,16.04,638,400,936.680


In [6]:
# Normalize features
scaler = StandardScaler()
X_train_normalized = scaler.fit_transform(X_train)
X_test_normalized = scaler.transform(X_test)

In [7]:
le = LabelEncoder()
y_train = le.fit_transform(y_train)

In [8]:
clf = RandomForestClassifier(
    n_estimators=20,
    criterion='entropy',  # 'information_gain' in RapidMiner is equivalent to 'entropy' in scikit-learn
    max_depth=10,
    random_state=1992
)

In [9]:
clf.fit(X_train, y_train)

In [10]:
y_pred = clf.predict(X_test)

In [11]:
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)

In [12]:
print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')

Accuracy: 0.97280323593592
Precision: 0.9705040721989875


In [13]:
import pickle

In [14]:
# export out the model
with open('random_forest_model.pkl','wb') as f:
    pickle.dump(clf,f)