In [1]:
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix

In [2]:
df = pd.read_csv('../datasets/label_data.csv')
df = df.rename(columns={'label': 'target'})
df.head()

Unnamed: 0,temperature,pressure,humidity,target
0,0.382509,0.601189,0.320719,0
1,0.716235,0.601189,0.309202,0
2,0.737459,0.601189,0.197776,0
3,0.970046,0.601189,0.790369,0
4,0.119006,0.601189,0.400452,0


In [3]:
X = df.drop('target', axis=1).values
y = df['target'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

forest = RandomForestClassifier(class_weight='balanced')
forest.fit(X_train, y_train)

y_pred_test = forest.predict(X_test)
print(classification_report(y_test, y_pred_test))

              precision    recall  f1-score   support

           0       0.97      1.00      0.98    139930
           1       0.96      0.80      0.88     22631

    accuracy                           0.97    162561
   macro avg       0.97      0.90      0.93    162561
weighted avg       0.97      0.97      0.97    162561


In [4]:
accuracy = accuracy_score(y_test, y_pred_test)
precision = precision_score(y_test, y_pred_test, average='weighted')
recall = recall_score(y_test, y_pred_test, average='weighted')
f1 = f1_score(y_test, y_pred_test, average='weighted')

print("Testing Result:")
print(f"Accuracy: {accuracy:.4f}   Precision: {precision:.4f}   Recall: {recall:.4f}   F1-Score: {f1:.4f}")

Testing Result:
Accuracy: 0.9686   Precision: 0.9684   Recall: 0.9686   F1-Score: 0.9674


In [5]:
forest.feature_importances_

array([0.23712824, 0.4716697 , 0.29120206])

In [6]:
import joblib

model_filename = "random_forest_model.joblib"
joblib.dump(forest, model_filename)

['random_forest_model.joblib']

In [7]:
data = {
    'temperature': [0.705899748, 0.667428042, 0.70225628, 0.712440301, 0.66477176, 0.708440581, 0.698706139, 0.680803613, 0.69283698, 0.701999486],
    'pressure': [0.600658318, 0.596070808, 0.585887767, 0.600658318, 0.596070808, 0.585887767, 0.600658318, 0.596070808, 0.585887767, 0.600658318],
    'humidity': [0.33886893, 0.897541677, 0.124885584, 0.33886893, 0.897541677, 0.124885584, 0.33886893, 0.897541677, 0.124885584, 0.33886893]
}

new_data = pd.DataFrame(data)
new_data

Unnamed: 0,temperature,pressure,humidity
0,0.7059,0.600658,0.338869
1,0.667428,0.596071,0.897542
2,0.702256,0.585888,0.124886
3,0.71244,0.600658,0.338869
4,0.664772,0.596071,0.897542
5,0.708441,0.585888,0.124886
6,0.698706,0.600658,0.338869
7,0.680804,0.596071,0.897542
8,0.692837,0.585888,0.124886
9,0.701999,0.600658,0.338869


In [8]:
# Load the saved model from a file
loaded_model = joblib.load(model_filename)

new_predictions = loaded_model.predict(new_data)
new_predictions



array([1, 1, 0, 0, 1, 1, 1, 1, 1, 1], dtype=int64)