In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report


In [3]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Set random seed for reproducibility
np.random.seed(42)

# Number of data points
num_data_points = 1000

# Create synthetic sensor data
sensor_data = pd.DataFrame({
    'Timestamp': pd.date_range(start=datetime.now(), periods=num_data_points, freq='H'),
        'Temperature': np.random.normal(loc=25, scale=5, size=num_data_points),
            'Pressure': np.random.normal(loc=100, scale=10, size=num_data_points),
                'Vibration': np.random.normal(loc=0, scale=1, size=num_data_points),
                })

                # Create synthetic maintenance records
maintenance_records = pd.DataFrame({
    'Timestamp': pd.date_range(start=datetime.now(), periods=num_data_points//10, freq='D'),
          'Maintenance_Type': np.random.choice(['Routine', 'Emergency'], size=num_data_points//10),
            })

                        # Merge sensor data and maintenance records
df = pd.merge_asof(sensor_data, maintenance_records, on='Timestamp', direction='backward')

                        # Fill NaN values in 'Maintenance_Type' with 'No Maintenance'
df['Maintenance_Type'].fillna('No Maintenance', inplace=True)

                        # Create binary target variable indicating failure (1) or not (0)
df['Failure'] = np.where(df['Maintenance_Type'] == 'Emergency', 1, 0)

                        # Save the synthetic dataset to a CSV file
df.to_csv('data.csv', index=False)


In [4]:
# Load the data into a pandas DataFrame
df = pd.read_csv('data.csv')


In [5]:
# Basic data exploration
print(df.head())
print(df.info())
print(df.describe())


                    Timestamp  Temperature    Pressure  Vibration  \
0  2023-11-17 16:52:52.039286    27.483571  113.993554  -0.675178   
1  2023-11-17 17:52:52.039286    24.308678  109.246337  -0.144519   
2  2023-11-17 18:52:52.039286    28.238443  100.596304  -0.792420   
3  2023-11-17 19:52:52.039286    32.615149   93.530632  -0.307962   
4  2023-11-17 20:52:52.039286    23.829233  106.982233  -1.893615   

  Maintenance_Type  Failure  
0   No Maintenance        0  
1        Emergency        1  
2        Emergency        1  
3        Emergency        1  
4        Emergency        1  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 6 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Timestamp         1000 non-null   object 
 1   Temperature       1000 non-null   float64
 2   Pressure          1000 non-null   float64
 3   Vibration         1000 non-null   float64
 4   Maintenance_T

In [8]:
# Feature selection
features = ['Temperature', 'Pressure', 'Vibration']
X = df[features]
y = df['Failure']

In [9]:
# Example: Impute missing values with mean
X.fillna(X.mean(), inplace=True)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X.fillna(X.mean(), inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X.fillna(X.mean(), inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X.fillna(X.mean(), inplace=True)


In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [11]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)


In [12]:
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.55
Classification Report:
               precision    recall  f1-score   support

           0       0.59      0.69      0.64       115
           1       0.46      0.36      0.41        85

    accuracy                           0.55       200
   macro avg       0.53      0.53      0.52       200
weighted avg       0.54      0.55      0.54       200

