In [1]:
import pandas as pd
import numpy as np
import random

# Function to generate real-time sensor data
def generate_real_time_data(num_samples, file_path):
    # Define ranges for sensor readings
    vibration_range = (20, 100)  # Example: vibration readings between 20 and 100
    moisture_range = (10, 80)    # Example: moisture readings between 10 and 80
    temperature_range = (0, 40)  # Example: temperature readings between 0 and 40

    # Generate random sensor readings within defined ranges
    vibration = np.random.uniform(*vibration_range, size=num_samples)
    moisture = np.random.uniform(*moisture_range, size=num_samples)
    temperature = np.random.uniform(*temperature_range, size=num_samples)

    # Generate failure labels (0 or 1)
    failure_label = np.random.randint(2, size=num_samples)

    # Generate error types
    error_types = ['None', 'High Vibration', 'High Moisture', 'High Temperature']
    error_type = [random.choice(error_types) if label == 1 else 'None' for label in failure_label]

    # Create DataFrame
    data = {
        'Vibration': vibration,
        'Moisture': moisture,
        'Temperature': temperature,
        'FailureLabel': failure_label,
        'ErrorType': error_type
    }
    df = pd.DataFrame(data)

    # Save to CSV
    df.to_csv(file_path, index=False)

    print(f"Real-time sensor data saved to {file_path}")

# Define the number of samples and file path
num_samples = 1000  # Adjust as needed
csv_file_path = 'real_time_sensor_data.csv'

# Generate and save real-time sensor data
generate_real_time_data(num_samples, csv_file_path)


Real-time sensor data saved to real_time_sensor_data.csv


In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder

In [9]:
sensor_data = pd.read_csv('sensor_data_with_labels_and_error_types.csv')

In [11]:
numeric_cols = sensor_data.select_dtypes(include=['float64', 'int64']).columns
sensor_data[numeric_cols] = sensor_data[numeric_cols].fillna(sensor_data[numeric_cols].mean())

In [12]:
label_encoder = LabelEncoder()
sensor_data['ErrorType'] = label_encoder.fit_transform(sensor_data['ErrorType'])

In [14]:
X = sensor_data[['Vibration', 'Moisture', 'Temperature']]
y_failure = sensor_data['FailureLabel']  # Target variable for failure (1: failure, 0: no failure)
y_error = sensor_data['ErrorType']

In [15]:
X_train, X_test, y_failure_train, y_failure_test, y_error_train, y_error_test = train_test_split(
    X, y_failure, y_error, test_size=0.2, random_state=42)


In [16]:
rf_failure_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_error_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the classifiers
rf_failure_classifier.fit(X_train, y_failure_train)
rf_error_classifier.fit(X_train, y_error_train)

In [17]:
y_failure_pred = rf_failure_classifier.predict(X_test)
y_error_pred = rf_error_classifier.predict(X_test)

In [18]:
y_failure_pred

array([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,
       0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0], dtype=int64)

In [19]:
y_error_pred

array([2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2,
       2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       0, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 2,
       2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2,
       2, 1, 2, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2])

In [20]:
failure_accuracy = accuracy_score(y_failure_test, y_failure_pred)
print("Failure Prediction Accuracy:", failure_accuracy)
print("Classification Report for Failure Prediction:")
print(classification_report(y_failure_test, y_failure_pred))
print("Confusion Matrix for Failure Prediction:")
print(confusion_matrix(y_failure_test, y_failure_pred))


Failure Prediction Accuracy: 1.0
Classification Report for Failure Prediction:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       128
           1       1.00      1.00      1.00        21

    accuracy                           1.00       149
   macro avg       1.00      1.00      1.00       149
weighted avg       1.00      1.00      1.00       149

Confusion Matrix for Failure Prediction:
[[128   0]
 [  0  21]]
