# Predictive Maintenance for HVAC Pumps
This notebook demonstrates predictive maintenance using sensor data from HVAC pumps.
We will explore the data, perform preprocessing, engineer features, train models, compare results, and deploy the best model using FastAPI.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix
import joblib
import warnings
warnings.filterwarnings('ignore')


In [None]:
# Load dataset
df = pd.read_csv("sensor.csv")
df.head()


In [None]:
# Check missing values
df.isnull().sum()


In [None]:
# Drop columns with more than 10% missing values
threshold = len(df) * 0.9
df.dropna(axis=1, thresh=threshold, inplace=True)

# Drop columns with all nulls
df.dropna(axis=1, how='all', inplace=True)

# Fill remaining missing values
df.fillna(df.median(numeric_only=True), inplace=True)

df.info()


In [None]:
# Convert timestamp to datetime
df['timestamp'] = pd.to_datetime(df['timestamp'])

# Encode target variable
df['machine_status'] = df['machine_status'].map({'NORMAL': 0, 'RECOVERING': 1, 'BROKEN': 2})
df['machine_status'].value_counts()


In [None]:
# Define features and target
X = df.drop(columns=['Unnamed: 0', 'timestamp', 'machine_status'], errors='ignore')
y = df['machine_status']


In [None]:
# Split and scale
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:
# Model training and comparison
models = {
    'RandomForest': RandomForestClassifier(n_estimators=100, random_state=42),
    'SVM': SVC(probability=True)
}
results = {}

for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    report = classification_report(y_test, y_pred, output_dict=True)
    results[name] = report['weighted avg']['f1-score']
    print(f"\n{name} Report:")
    print(classification_report(y_test, y_pred))

best_model_name = max(results, key=results.get)
print(f"\nBest Model: {best_model_name}, F1 Score: {results[best_model_name]:.4f}")


In [None]:
# Save best model and scaler
best_model = models[best_model_name]
joblib.dump(best_model, "best_model.pkl")
joblib.dump(scaler, "scaler.pkl")


## Deployment Instructions
Use the following Python script (`fastapi_app.py`) to deploy your model:

```python
from fastapi import FastAPI
from pydantic import BaseModel
import numpy as np
import joblib

app = FastAPI()

class SensorData(BaseModel):
    values: list

@app.post('/predict')
def predict(data: SensorData):
    model = joblib.load('best_model.pkl')
    scaler = joblib.load('scaler.pkl')
    arr = np.array(data.values).reshape(1, -1)
    arr_scaled = scaler.transform(arr)
    pred = model.predict(arr_scaled)
    return {'prediction': int(pred[0])}
```

Run the API with:
`uvicorn fastapi_app:app --reload`