In [1]:

from google.colab import files
files.upload()



Saving vibration_data.csv to vibration_data.csv


{'vibration_data.csv': b'vibration,label\r\n110,normal\r\n125,normal\r\n140,normal\r\n155,normal\r\n165,normal\r\n175,normal\r\n185,normal\r\n195,normal\r\n205,normal\r\n215,normal\r\n225,normal\r\n235,normal\r\n245,normal\r\n255,normal\r\n265,normal\r\n275,normal\r\n285,normal\r\n295,normal\r\n305,normal\r\n315,normal\r\n325,normal\r\n335,normal\r\n345,normal\r\n355,normal\r\n365,normal\r\n375,normal\r\n385,normal\r\n395,normal\r\n405,normal\r\n415,normal\r\n230,fault\r\n240,fault\r\n250,fault\r\n260,fault\r\n270,fault\r\n280,fault\r\n290,fault\r\n300,fault\r\n310,fault\r\n320,fault\r\n330,fault\r\n340,fault\r\n350,fault\r\n360,fault\r\n370,fault\r\n380,fault\r\n390,fault\r\n400,fault\r\n410,fault\r\n420,fault\r\n'}

In [2]:
import pandas as pd

df = pd.read_csv("vibration_data.csv")

print(df.shape)
print(df["label"].value_counts())
df.head()


(50, 2)
label
normal    30
fault     20
Name: count, dtype: int64


Unnamed: 0,vibration,label
0,110,normal
1,125,normal
2,140,normal
3,155,normal
4,165,normal


In [3]:
df["y"] = df["label"].map({
    "normal": 0,
    "fault": 1
})



In [4]:

X = df[["vibration"]]   # feature
y = df["y"]             # target



In [5]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)


In [6]:
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(
    n_estimators=100,     # number of trees
    max_depth=5,          # limit depth to avoid overfitting
    random_state=42
)

rf_model.fit(X_train, y_train)
print("Random Forest trained successfully")


Random Forest trained successfully


In [7]:
y_pred = rf_model.predict(X_test)


In [8]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

print("✅ Random Forest Results")
print("Accuracy:", accuracy_score(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))


✅ Random Forest Results
Accuracy: 0.3

Classification Report:
              precision    recall  f1-score   support

           0       0.40      0.33      0.36         6
           1       0.20      0.25      0.22         4

    accuracy                           0.30        10
   macro avg       0.30      0.29      0.29        10
weighted avg       0.32      0.30      0.31        10


Confusion Matrix:
[[2 4]
 [3 1]]


In [9]:
from sklearn.model_selection import StratifiedKFold, cross_validate

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

scores = cross_validate(
    rf_model,
    X, y,
    cv=cv,
    scoring=["accuracy", "precision", "recall", "f1"]
)

print("Cross-Validation Mean Scores (RF)")
print("Accuracy :", scores["test_accuracy"].mean())
print("Precision:", scores["test_precision"].mean())
print("Recall   :", scores["test_recall"].mean())
print("F1 Score :", scores["test_f1"].mean())


Cross-Validation Mean Scores (RF)
Accuracy : 0.22000000000000003
Precision: 0.0
Recall   : 0.0
F1 Score : 0.0


In [10]:
rf_model_balanced = RandomForestClassifier(
    n_estimators=200,
    max_depth=6,
    class_weight="balanced",
    random_state=42
)

rf_model_balanced.fit(X_train, y_train)

y_pred_bal = rf_model_balanced.predict(X_test)

print("Balanced RF Results")
print(classification_report(y_test, y_pred_bal))
print(confusion_matrix(y_test, y_pred_bal))


Balanced RF Results
              precision    recall  f1-score   support

           0       0.40      0.33      0.36         6
           1       0.20      0.25      0.22         4

    accuracy                           0.30        10
   macro avg       0.30      0.29      0.29        10
weighted avg       0.32      0.30      0.31        10

[[2 4]
 [3 1]]


In [11]:
import joblib
joblib.dump(rf_model, "rf_vibration_model.pkl")


['rf_vibration_model.pkl']