In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

# 1. Load Data
df = pd.read_csv('Train/Belt 1 9 Months negative data.xlsx', parse_dates=['Timestamp'], index_col='Timestamp')
expected_ranges = pd.read_csv('Expected Ranges for Values.xlsx', index_col='Name')

# 2. Anomaly Detection and Feature Engineering (Example for Motor Temperature)
def create_anomaly_features(df, expected_ranges):
    for col in df.columns:
        if pd.api.types.is_numeric_dtype(df[col]) and col in expected_ranges.index:
            lower_bound = expected_ranges.loc[col, 'Lower Bound']
            upper_bound = expected_ranges.loc[col, 'Upper Bound']
            df[f'{col}_anomaly'] = ((df[col] < lower_bound) | (df[col] > upper_bound)).astype(int)
            df[f'{col}_deviation'] = np.where(df[col] < lower_bound, df[col] - lower_bound, np.where(df[col] > upper_bound, df[col] - upper_bound, 0))
    return df

df = create_anomaly_features(df, expected_ranges)

# 3. Rolling Statistics and Lag Features (Example for Motor Temperature)
def create_rolling_lag_features(df, column, window_sizes, lag_sizes):
    for window in window_sizes:
        df[f'{column}_rolling_mean_{window}h'] = df[column].rolling(window * 4).mean()
        df[f'{column}_rolling_std_{window}h'] = df[column].rolling(window * 4).std()
    for lag in lag_sizes:
        df[f'{column}_lag_{lag}'] = df[column].shift(lag)
    return df

df = create_rolling_lag_features(df, 'Motor Temperature', [1, 24], [1, 4])

# 4. Status Encoding and one hot encoding of Description.
df['Maintenance'] = (df['Status'] == 'Maintenance').astype(int)
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
encoded_descriptions = encoder.fit_transform(df[['Description']])
encoded_df = pd.DataFrame(encoded_descriptions, index=df.index)
df = pd.concat([df, encoded_df], axis=1)

df = df.drop(['Status','Description'], axis = 1)

df = df.dropna()

# 5. Prepare Data for LSTM
X = df.drop(['Maintenance'], axis=1).values
y = df['Maintenance'].values

scaler = MinMaxScaler()
X = scaler.fit_transform(X)

X = X.reshape(X.shape[0], 1, X.shape[1]) # Reshape for LSTM

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# 6. Build and Train LSTM Model
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stopping])

UnicodeDecodeError: 'utf-8' codec can't decode bytes in position 15-16: invalid continuation byte