In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import joblib

%matplotlib inline


In [None]:
# Load dataset
df = pd.read_csv('../data/sensor.csv')
df.head()


In [None]:
# Summary statistics
df.describe()


In [None]:
# Check class balance
sns.countplot(x='failure', data=df)
plt.title('Failure Class Distribution')
plt.show()


In [None]:
# Encode categorical features
le = LabelEncoder()
df['equipment_type'] = le.fit_transform(df['equipment_type'])

# Feature scaling
features = ['temperature', 'vibration', 'pressure', 'equipment_type', 'equipment_age']
X = df[features]
y = df['failure']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)


In [None]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)


In [None]:
y_pred = model.predict(X_test)
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))


In [None]:
# Save model
joblib.dump(model, '../models/random_forest_model.pkl')
print("Model saved to ../models/random_forest_model.pkl")
