In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer

In [2]:
# Load the dataset
dataset = pd.read_csv('predictive_maintenance.csv')

In [3]:
# Display the first few rows of the dataset for analysis
print("Dataset Overview:")
print(dataset.head())

Dataset Overview:
   UDI Product ID Type  Air temperature [K]  Process temperature [K]  \
0    1     M14860    M                298.1                    308.6   
1    2     L47181    L                298.2                    308.7   
2    3     L47182    L                298.1                    308.5   
3    4     L47183    L                298.2                    308.6   
4    5     L47184    L                298.2                    308.7   

   Rotational speed [rpm]  Torque [Nm]  Tool wear [min]  Target Failure Type  
0                    1551         42.8                0       0   No Failure  
1                    1408         46.3                3       0   No Failure  
2                    1498         49.4                5       0   No Failure  
3                    1433         39.5                7       0   No Failure  
4                    1408         40.0                9       0   No Failure  


In [4]:
# Check for missing values
print("\nMissing Values:")
print(dataset.isnull().sum())


Missing Values:
UDI                        0
Product ID                 0
Type                       0
Air temperature [K]        0
Process temperature [K]    0
Rotational speed [rpm]     0
Torque [Nm]                0
Tool wear [min]            0
Target                     0
Failure Type               0
dtype: int64


In [5]:
# Encode categorical variables if there are any
label_encoder = LabelEncoder()
categorical_columns = ['Product ID', 'Type', 'UDI']  # Add other categorical columns if needed

for col in categorical_columns:
    dataset[col] = label_encoder.fit_transform(dataset[col])

# Data preprocessing
X = dataset.drop(['UDI', 'Target', 'Failure Type'], axis=1)  # Features
y = dataset['Target']  # Target variable

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the model (Random Forest Classifier in this case)
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Make predictions on the test set
predictions = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, predictions)
report = classification_report(y_test, predictions)

print(f"\nAccuracy: {accuracy}")
print("Classification Report:")
print(report)


Accuracy: 0.984
Classification Report:
              precision    recall  f1-score   support

           0       0.99      1.00      0.99      1939
           1       0.81      0.62      0.70        61

    accuracy                           0.98      2000
   macro avg       0.90      0.81      0.85      2000
weighted avg       0.98      0.98      0.98      2000

