In [1]:
import numpy as np
import pandas as pd

num_samples = 10000  # Number of data points
start_date = '2020-01-01'
end_date = '2023-12-31'
sensor_data = pd.DataFrame({
    'Timestamp': pd.date_range(start=start_date, end=end_date, periods=num_samples),
    'Temperature': np.random.uniform(10, 40, num_samples),  # Example temperature range (in Celsius)
    'Pressure': np.random.uniform(800, 1200, num_samples),  # Example pressure range (in kPa)
    'Vibration': np.random.uniform(0, 1, num_samples),      # Example vibration range (unitless)
    'Fluid Level': np.random.uniform(0, 100, num_samples)   # Example fluid level range (in percentage)
})

maintenance_records = pd.DataFrame({
    'Timestamp': pd.date_range(start=start_date, end=end_date, periods=num_samples),
    'Maintenance Type': np.random.choice(['Routine', 'Corrective', 'Preventive'], num_samples),
    'Duration (hours)': np.random.randint(1, 24, num_samples),  # Example maintenance duration (in hours)
    'Downtime (hours)': np.random.randint(0, 48, num_samples)   # Example downtime duration (in hours)
})

sensor_data.to_csv('synthetic_sensor_data.csv', index=False)
maintenance_records.to_csv('synthetic_maintenance_records.csv', index=False)


In [2]:
Maintain_df=pd.read_csv('synthetic_maintenance_records.csv')

In [4]:
Maintain_df.head(-5)

Unnamed: 0,Timestamp,Maintenance Type,Duration (hours),Downtime (hours)
0,2020-01-01 00:00:00.000000000,Corrective,19,44
1,2020-01-01 03:30:15.661566156,Corrective,17,9
2,2020-01-01 07:00:31.323132313,Routine,3,42
3,2020-01-01 10:30:46.984698469,Preventive,22,42
4,2020-01-01 14:01:02.646264626,Routine,2,46
...,...,...,...,...
9990,2023-12-29 16:27:39.045904592,Preventive,23,13
9991,2023-12-29 19:57:54.707470736,Routine,16,35
9992,2023-12-29 23:28:10.369036896,Preventive,23,41
9993,2023-12-30 02:58:26.030603056,Corrective,11,36


In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
sensor_data = pd.read_csv('synthetic_sensor_data.csv')

maintenance_records = pd.read_csv('synthetic_maintenance_records.csv')
merged_data = pd.merge(sensor_data, maintenance_records, on='Timestamp', how='left')
print(merged_data.head())


                       Timestamp  Temperature     Pressure  Vibration  \
0  2020-01-01 00:00:00.000000000    25.910813   874.740784   0.882250   
1  2020-01-01 03:30:15.661566156    27.238732   844.962991   0.398855   
2  2020-01-01 07:00:31.323132313    13.709520   889.375764   0.451322   
3  2020-01-01 10:30:46.984698469    38.899141  1113.626904   0.516603   
4  2020-01-01 14:01:02.646264626    14.344158  1182.368183   0.861944   

   Fluid Level Maintenance Type  Duration (hours)  Downtime (hours)  
0     5.558600       Corrective                19                44  
1    43.372134       Corrective                17                 9  
2    77.801227          Routine                 3                42  
3    90.317132       Preventive                22                42  
4    53.846521          Routine                 2                46  


In [7]:

merged_data['Timestamp'] = pd.to_datetime(merged_data['Timestamp'])

merged_data['Hour'] = merged_data['Timestamp'].dt.hour
merged_data['DayOfWeek'] = merged_data['Timestamp'].dt.dayofweek
merged_data['Temperature_Rolling_Mean'] = merged_data['Temperature'].rolling(window=3).mean()
print(merged_data.head())


                      Timestamp  Temperature     Pressure  Vibration  \
0 2020-01-01 00:00:00.000000000    25.910813   874.740784   0.882250   
1 2020-01-01 03:30:15.661566156    27.238732   844.962991   0.398855   
2 2020-01-01 07:00:31.323132313    13.709520   889.375764   0.451322   
3 2020-01-01 10:30:46.984698469    38.899141  1113.626904   0.516603   
4 2020-01-01 14:01:02.646264626    14.344158  1182.368183   0.861944   

   Fluid Level Maintenance Type  Duration (hours)  Downtime (hours)  Hour  \
0     5.558600       Corrective                19                44     0   
1    43.372134       Corrective                17                 9     3   
2    77.801227          Routine                 3                42     7   
3    90.317132       Preventive                22                42    10   
4    53.846521          Routine                 2                46    14   

   DayOfWeek  Temperature_Rolling_Mean  
0          2                       NaN  
1          2          

In [20]:
import pickle
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression  
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.impute import SimpleImputer

# Assuming 'merged_data' contains the dataset and 'Maintenance Type' is the target variable
X = merged_data.drop(['Timestamp', 'Maintenance Type'], axis=1) 
y = merged_data['Maintenance Type'] 

# Define the imputer
imputer = SimpleImputer(strategy='mean')

# Impute missing values in the features
X_imputed = imputer.fit_transform(X)

# Split the imputed data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_imputed, y, test_size=0.2, random_state=42)

# List of models to evaluate
models = [
    RandomForestClassifier(random_state=42),
    LogisticRegression(random_state=42, max_iter=1000),  # Increase max_iter
    DecisionTreeClassifier(random_state=42),
    SVC(random_state=42)
]

results = {}

for model in models:
    model_name = model.__class__.__name__
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    classification_rep = classification_report(y_test, y_pred)
    results[model_name] = {
        'accuracy': accuracy,
        'classification_report': classification_rep
    }

for model_name, metrics in results.items():
    print(f"Model: {model_name}")
    print(f"Accuracy: {metrics['accuracy']}")
    print("Classification Report:")
    print(metrics['classification_report'])
    print("-------------------------------------------")
    
# Saving the Random Forest Classifier model
with open('random_forest_model.pkl', 'wb') as file:
    pickle.dump(models[0], file)  # Save the trained Random Forest Classifier model


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Model: RandomForestClassifier
Accuracy: 0.326
Classification Report:
              precision    recall  f1-score   support

  Corrective       0.33      0.36      0.34       673
  Preventive       0.33      0.32      0.32       658
     Routine       0.33      0.29      0.31       669

    accuracy                           0.33      2000
   macro avg       0.33      0.33      0.33      2000
weighted avg       0.33      0.33      0.33      2000

-------------------------------------------
Model: LogisticRegression
Accuracy: 0.3345
Classification Report:
              precision    recall  f1-score   support

  Corrective       0.35      0.37      0.36       673
  Preventive       0.33      0.41      0.37       658
     Routine       0.33      0.22      0.27       669

    accuracy                           0.33      2000
   macro avg       0.33      0.33      0.33      2000
weighted avg       0.33      0.33      0.33      2000

-------------------------------------------
Model: Decision

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
