In [27]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer

data_path = ('/content/maitenance_record_data1.csv')
dj = pd.read_csv(data_path)

# here I will be Exploring the dataset
print(dj.head())


# here 'Timestamp' is a feature that needs to be converted to datetime format
dj['Timestamp'] = pd.to_datetime(dj['Timestamp'])

# Here I'm Encode categorical variables
le = LabelEncoder()
dj['Status'] = le.fit_transform(dj['Status'])

# here I'm Featurring engineering for timestamps
dj['HourOfDay'] = dj['Timestamp'].dt.hour


# Handling missing values
imputer = SimpleImputer(strategy='mean')
dj['Temperature'] = imputer.fit_transform(dj[['Temperature']])
dj['Pressure'] = imputer.fit_transform(dj[['Pressure']])

# Define features and target variable
features = ['Temperature', 'Pressure', 'HourOfDay']
target = 'Status'

# here i'm Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(dj[features], dj[target], test_size=0.2, random_state=42)

# Train a machine learning model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# here I'm Evaluating the model
accuracy = accuracy_score(y_test, y_pred)
classification_report_result = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print("Classification Report:\n", classification_report_result)

          Timestamp  Temperature    Pressure       Status
0  01/01/2019 00:00    33.973713  966.241086  NormalState
1  01/01/2019 00:01    28.893886  992.774067  NormalState
2  01/01/2019 00:02    35.181508  960.379004  FaultyState
3  01/01/2019 00:03    42.184239  984.601923  NormalState
4  01/01/2019 00:04    28.126773  905.319267  NormalState
Accuracy: 0.7675
Classification Report:
               precision    recall  f1-score   support

           0       0.17      0.05      0.08        77
           1       0.81      0.94      0.87       323

    accuracy                           0.77       400
   macro avg       0.49      0.50      0.47       400
weighted avg       0.68      0.77      0.72       400



In [34]:
dj.head()

Unnamed: 0,Timestamp,Temperature,Pressure,Status
0,01/01/2019 00:00,33.973713,966.241086,NormalState
1,01/01/2019 00:01,28.893886,992.774067,NormalState
2,01/01/2019 00:02,35.181508,960.379004,FaultyState
3,01/01/2019 00:03,42.184239,984.601923,NormalState
4,01/01/2019 00:04,28.126773,905.319267,NormalState


In [29]:
dj.tail()

Unnamed: 0,Timestamp,Temperature,Pressure,Status,HourOfDay
1995,2019-02-01 09:15:00,38.561202,1001.422879,1,9
1996,2019-02-01 09:16:00,29.78783,896.109409,1,9
1997,2019-02-01 09:17:00,22.945003,983.98511,1,9
1998,2019-02-01 09:18:00,28.695464,1082.168908,1,9
1999,2019-02-01 09:19:00,24.040779,1018.032395,1,9


In [30]:
dj.describe()

Unnamed: 0,Temperature,Pressure,Status,HourOfDay
count,2000.0,2000.0,2000.0,2000.0
mean,30.360673,999.677875,0.7955,9.45
std,7.907725,50.267703,0.403437,6.882673
min,4.069861,849.024392,0.0,0.0
25%,25.018705,964.543702,1.0,4.0
50%,30.357533,999.997335,1.0,8.0
75%,35.463821,1033.277184,1.0,15.0
max,60.821852,1196.311885,1.0,23.0


