In [2]:
pip install optree

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [3]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.feature_selection import mutual_info_regression
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from scipy import stats
import optuna

# Check if GPU is available
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(f"{len(gpus)} Physical GPUs, {len(logical_gpus)} Logical GPUs")
    except RuntimeError as e:
        print(e)

# Load the dataset
df = pd.read_csv("NLP_Dataset1.csv")

# Sample the data for quick processing
df_sample = df.sample(n=100000, random_state=42)

# Select relevant columns
selected_columns = [
    'organizationcountrycode', 'AssetType', 'AlarmLabel', 'Severity',
    'ActivatedTimestamp', 'ClearedTimestamp', 'month', 'week', 'ResolutionTimeMinutes'
]
df_selected = df_sample[selected_columns]

# Convert categorical columns to numerical using Label Encoding
label_encoders = {}
for column in ['organizationcountrycode', 'AssetType', 'AlarmLabel', 'Severity']:
    le = LabelEncoder()
    df_selected[column] = le.fit_transform(df_selected[column])
    label_encoders[column] = le

# Convert timestamps to datetime and extract features
df_selected['ActivatedTimestamp'] = pd.to_datetime(df_selected['ActivatedTimestamp'])
df_selected['ClearedTimestamp'] = pd.to_datetime(df_selected['ClearedTimestamp'])
df_selected['ActivationHour'] = df_selected['ActivatedTimestamp'].dt.hour
df_selected['ClearanceHour'] = df_selected['ClearedTimestamp'].dt.hour
df_selected['ActivationDayOfWeek'] = df_selected['ActivatedTimestamp'].dt.dayofweek
df_selected['ClearanceDayOfWeek'] = df_selected['ClearedTimestamp'].dt.dayofweek
df_selected['ResolutionTime'] = (df_selected['ClearedTimestamp'] - df_selected['ActivatedTimestamp']).dt.total_seconds() / 60

# Remove rows with invalid resolution times
df_selected = df_selected[(df_selected['ResolutionTime'] >= 0) & (df_selected['ResolutionTime'] <= 10000)]

# Remove outliers
z_scores = np.abs(stats.zscore(df_selected['ResolutionTimeMinutes']))
df_selected = df_selected[z_scores < 3]

# Normalize the ResolutionTimeMinutes column
scaler = MinMaxScaler()
df_selected['ResolutionTimeMinutes'] = scaler.fit_transform(df_selected[['ResolutionTimeMinutes']])

# Define features and target
features = [
    'organizationcountrycode', 'AssetType', 'AlarmLabel', 'Severity',
    'month', 'week', 'ActivationHour', 'ClearanceHour',
    'ActivationDayOfWeek', 'ClearanceDayOfWeek'
]
X = df_selected[features]
y = df_selected['ResolutionTimeMinutes']

# Scale the features
feature_scaler = StandardScaler()
X = feature_scaler.fit_transform(X)

# Split the data into train, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Feature importance using Random Forest
rf = RandomForestRegressor()
rf.fit(X_train, y_train)
rf_importances = rf.feature_importances_

# Mutual Information for feature importance
mi = mutual_info_regression(X_train, y_train)
mi_importances = mi

# Plot feature importances
feature_names = ['organizationcountrycode', 'AssetType', 'AlarmLabel', 'Severity',
                 'month', 'week', 'ActivationHour', 'ClearanceHour',
                 'ActivationDayOfWeek', 'ClearanceDayOfWeek']
fig, axs = plt.subplots(1, 2, figsize=(14, 5))

# Random Forest Feature Importance
axs[0].barh(feature_names, rf_importances)
axs[0].set_title('Random Forest Feature Importance')

# Mutual Information Feature Importance
axs[1].barh(feature_names, mi_importances)
axs[1].set_title('Mutual Information Feature Importance')

plt.tight_layout()
plt.show()

# Define the objective function for Optuna
def objective(trial):
    try:
        # Define the hyperparameter search space
        hidden_layer_sizes = trial.suggest_categorical('hidden_layer_sizes', [(100,), (100, 50), (150, 75), (200, 100)])
        activation = trial.suggest_categorical('activation', ['relu', 'tanh'])
        solver = trial.suggest_categorical('solver', ['adam', 'sgd'])
        alpha = trial.suggest_float('alpha', 1e-5, 1e-1, log=True)
        learning_rate = trial.suggest_categorical('learning_rate', ['constant', 'adaptive'])
        max_iter = trial.suggest_int('max_iter', 300, 500)
        
        # Create the model
        model = Sequential()
        for units in hidden_layer_sizes:
            model.add(Dense(units=units, activation=activation))
        model.add(Dense(1))  # Output layer
        
        # Compile the model
        if solver == 'adam':
            optimizer = Adam(learning_rate=learning_rate)
        else:
            optimizer = SGD(learning_rate=learning_rate)
        
        model.compile(optimizer=optimizer, loss='mse')
        
        # Train the model
        early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
        model.fit(X_train, y_train, epochs=max_iter, batch_size=32, validation_data=(X_val, y_val), callbacks=[early_stopping], verbose=0)
        
        # Evaluate the model
        y_pred = model.predict(X_val)
        mse = mean_squared_error(y_val, y_pred)
        
        return mse
    except Exception as e:
        print(f"Error during trial: {e}")
        return float('inf')

# Run the optimization
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50, timeout=600)  # Added timeout for safety

# Get the best hyperparameters
best_params = study.best_params
print("Best hyperparameters:", best_params)

# Train the final model using the best hyperparameters
model = Sequential()
for units in best_params['hidden_layer_sizes']:
    model.add(Dense(units=units, activation=best_params['activation']))
model.add(Dense(1))  # Output layer

# Compile the model
if best_params['solver'] == 'adam':
    optimizer = Adam(learning_rate=best_params['learning_rate'])
else:
    optimizer = SGD(learning_rate=best_params['learning_rate'])

model.compile(optimizer=optimizer, loss='mse')

# Train the final model
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
model.fit(X_train, y_train, epochs=best_params['max_iter'], batch_size=32, validation_data=(X_val, y_val), callbacks=[early_stopping], verbose=0)

# Predict and evaluate on test data
nn_pred_test = model.predict(X_test)
nn_mse_test = mean_squared_error(y_test, nn_pred_test)
nn_r2_test = r2_score(y_test, nn_pred_test)

print(f"Neural Network - Test MSE: {nn_mse_test}, R²: {nn_r2_test}")

# Convert predicted values back to original scale
nn_pred_test_original = scaler.inverse_transform(nn_pred_test)

# Display a few predicted values
print(f"Original Scale Predictions - Neural Network: {nn_pred_test_original[:5].ravel()}")


ImportError: To use Keras, you need to have `optree` installed. Install it via `pip install optree`

In [2]:
pip show optree

Name: optree
Version: 0.11.0
Summary: Optimized PyTree Utilities.
Home-page: 
Author: OpTree Contributors
Author-email: Xuehai Pan <XuehaiPan@pku.edu.cn>, Jie Ren <jieren9806@gmail.com>
License: Apache License, Version 2.0
Location: /home/patil.anjali/.local/lib/python3.9/site-packages
Requires: typing-extensions
Required-by: keras
Note: you may need to restart the kernel to use updated packages.


In [4]:
pip install torch torchvision optuna

Defaulting to user installation because normal site-packages is not writeable
Collecting torch
  Downloading torch-2.3.1-cp39-cp39-manylinux1_x86_64.whl (779.1 MB)
[K     |████████████████████████████████| 779.1 MB 2.1 kB/s  eta 0:00:01     |███████▍                        | 179.3 MB 22.0 MB/s eta 0:00:28     |████████                        | 194.3 MB 21.2 MB/s eta 0:00:28     |████████▏                       | 197.7 MB 21.2 MB/s eta 0:00:28     |████████▎                       | 201.5 MB 21.2 MB/s eta 0:00:28     |█████████                       | 221.2 MB 11.6 MB/s eta 0:00:48     |█████████▊                      | 236.3 MB 10.4 MB/s eta 0:00:53     |███████████                     | 270.2 MB 10.8 MB/s eta 0:00:48     |█████████████████▋              | 427.8 MB 11.5 MB/s eta 0:00:31     |██████████████████              | 437.5 MB 6.9 MB/s eta 0:00:50     |███████████████████             | 462.8 MB 7.0 MB/s eta 0:00:46     |████████████████████▏           | 491.5 MB 11.7 MB/s eta 0:

In [6]:
pip install torch

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.feature_selection import mutual_info_regression
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from scipy import stats
import optuna

# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

# Load the dataset
file_path = 'tableau_data.csv'
df = pd.read_csv(file_path)

# Sample the data for quick processing
df_sample = df.sample(n=100000, random_state=42)

# Select relevant columns
selected_columns = [
    'organizationcountrycode', 'AssetType', 'AlarmLabel', 'Severity',
    'ActivatedTimestamp', 'ClearedTimestamp', 'month', 'week', 'ResolutionTimeMinutes'
]
df_selected = df_sample[selected_columns]

# Convert categorical columns to numerical using Label Encoding
label_encoders = {}
for column in ['organizationcountrycode', 'AssetType', 'AlarmLabel', 'Severity']:
    le = LabelEncoder()
    df_selected[column] = le.fit_transform(df_selected[column])
    label_encoders[column] = le

# Convert timestamps to datetime and extract features
df_selected['ActivatedTimestamp'] = pd.to_datetime(df_selected['ActivatedTimestamp'])
df_selected['ClearedTimestamp'] = pd.to_datetime(df_selected['ClearedTimestamp'])
df_selected['ActivationHour'] = df_selected['ActivatedTimestamp'].dt.hour
df_selected['ClearanceHour'] = df_selected['ClearedTimestamp'].dt.hour
df_selected['ActivationDayOfWeek'] = df_selected['ActivatedTimestamp'].dt.dayofweek
df_selected['ClearanceDayOfWeek'] = df_selected['ClearedTimestamp'].dt.dayofweek
df_selected['ResolutionTime'] = (df_selected['ClearedTimestamp'] - df_selected['ActivatedTimestamp']).dt.total_seconds() / 60

# Remove rows with invalid resolution times
df_selected = df_selected[(df_selected['ResolutionTime'] >= 0) & (df_selected['ResolutionTime'] <= 10000)]

# Remove outliers
z_scores = np.abs(stats.zscore(df_selected['ResolutionTimeMinutes']))
df_selected = df_selected[z_scores < 3]

# Normalize the ResolutionTimeMinutes column
scaler = MinMaxScaler()
df_selected['ResolutionTimeMinutes'] = scaler.fit_transform(df_selected[['ResolutionTimeMinutes']])

# Define features and target
features = [
    'organizationcountrycode', 'AssetType', 'AlarmLabel', 'Severity',
    'month', 'week', 'ActivationHour', 'ClearanceHour',
    'ActivationDayOfWeek', 'ClearanceDayOfWeek'
]
X = df_selected[features]
y = df_selected['ResolutionTimeMinutes']

# Scale the features
feature_scaler = StandardScaler()
X = feature_scaler.fit_transform(X)

# Convert to PyTorch tensors
X_tensor = torch.tensor(X, dtype=torch.float32).to(device)
y_tensor = torch.tensor(y, dtype=torch.float32).to(device).view(-1, 1)

# Split the data into train, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(X_tensor, y_tensor, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Create DataLoader
train_dataset = TensorDataset(X_train, y_train)
val_dataset = TensorDataset(X_val, y_val)
test_dataset = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Define the PyTorch neural network model
class Net(nn.Module):
    def __init__(self, input_dim, hidden_layers, activation):
        super(Net, self).__init__()
        layers = []
        for units in hidden_layers:
            layers.append(nn.Linear(input_dim, units))
            if activation == 'relu':
                layers.append(nn.ReLU())
            elif activation == 'tanh':
                layers.append(nn.Tanh())
            input_dim = units
        layers.append(nn.Linear(input_dim, 1))
        self.net = nn.Sequential(*layers)
    
    def forward(self, x):
        return self.net(x)

# Define the objective function for Optuna
def objective(trial):
    try:
        # Define the hyperparameter search space
        hidden_layers = trial.suggest_categorical('hidden_layers', [(100,), (100, 50), (150, 75), (200, 100)])
        activation = trial.suggest_categorical('activation', ['relu', 'tanh'])
        solver = trial.suggest_categorical('solver', ['adam', 'sgd'])
        lr = trial.suggest_float('lr', 1e-5, 1e-2, log=True)
        n_epochs = trial.suggest_int('n_epochs', 50, 100)

        # Initialize the model, loss function, and optimizer
        model = Net(X_train.shape[1], hidden_layers, activation).to(device)
        criterion = nn.MSELoss()
        if solver == 'adam':
            optimizer = optim.Adam(model.parameters(), lr=lr)
        else:
            optimizer = optim.SGD(model.parameters(), lr=lr)

        # Training loop
        model.train()
        for epoch in range(n_epochs):
            for X_batch, y_batch in train_loader:
                optimizer.zero_grad()
                outputs = model(X_batch)
                loss = criterion(outputs, y_batch)
                loss.backward()
                optimizer.step()
        
        # Validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                outputs = model(X_batch)
                loss = criterion(outputs, y_batch)
                val_loss += loss.item() * X_batch.size(0)
        val_loss /= len(val_loader.dataset)
        
        return val_loss
    except Exception as e:
        print(f"Error during trial: {e}")
        return float('inf')

# Run the optimization
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50, timeout=600)

# Get the best hyperparameters
best_params = study.best_params
print("Best hyperparameters:", best_params)

# Train the final model using the best hyperparameters
best_hidden_layers = best_params['hidden_layers']
best_activation = best_params['activation']
best_solver = best_params['solver']
best_lr = best_params['lr']
best_n_epochs = best_params['n_epochs']

model = Net(X_train.shape[1], best_hidden_layers, best_activation).to(device)
criterion = nn.MSELoss()
if best_solver == 'adam':
    optimizer = optim.Adam(model.parameters(), lr=best_lr)
else:
    optimizer = optim.SGD(model.parameters(), lr=best_lr)

# Training loop for the final model
model.train()
for epoch in range(best_n_epochs):
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()

# Evaluate the model on the test set
model.eval()
test_loss = 0
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        test_loss += loss.item() * X_batch.size(0)
test_loss /= len(test_loader.dataset)

# # Calculate test R²
# y_test_pred = model(X_test).cpu().numpy()
# y_test_true = y_test.cpu().numpy()
# test_r2 = r2_score(y_test_true, y_test_pred)

# print(f"Neural Network - Test MSE: {test_loss}, R²: {test_r2}")

# # Convert predicted values back to original scale
# nn_pred_test_original = scaler.inverse_transform(y_test_pred)

# Display a


Using device: cuda


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_selected[column] = le.fit_transform(df_selected[column])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_selected[column] = le.fit_transform(df_selected[column])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_selected[column] = le.fit_transform(df_selected[column])
A value is trying to be s

Best hyperparameters: {'hidden_layers': (200, 100), 'activation': 'tanh', 'solver': 'adam', 'lr': 4.396022277642128e-05, 'n_epochs': 99}


RuntimeError: Can't call numpy() on Tensor that requires grad. Use tensor.detach().numpy() instead.

In [3]:
y_test_pred = model(X_test).detach().cpu().numpy()
y_test_true = y_test.detach().cpu().numpy()
test_r2 = r2_score(y_test_true, y_test_pred)

print(f"Neural Network - Test MSE: {test_loss}, R²: {test_r2}")

# Convert predicted values back to original scale
nn_pred_test_original = scaler.inverse_transform(y_test_pred)

# Display a few predicted values
print(f"Original Scale Predictions - Neural Network: {nn_pred_test_original[:5].ravel()}")

Neural Network - Test MSE: 7.310122509195074e-05, R²: 0.9837267398834229
Original Scale Predictions - Neural Network: [-0.21367742  2.1354895   2.0554235  63.624157   -2.490572  ]


In [4]:
# Calculate test R²
y_test_pred = model(X_test).detach().cpu().numpy()
y_test_true = y_test.detach().cpu().numpy()

# Ensure no negative predictions
y_test_pred = np.maximum(y_test_pred, 0)

test_r2 = r2_score(y_test_true, y_test_pred)

print(f"Neural Network - Test MSE: {test_loss}, R²: {test_r2}")

# Convert predicted values back to original scale
nn_pred_test_original = scaler.inverse_transform(y_test_pred)

# Ensure no negative values after inverse transform
nn_pred_test_original = np.maximum(nn_pred_test_original, 0)

# Display a few predicted values
print(f"Original Scale Predictions - Neural Network: {nn_pred_test_original[:5].ravel()}")


Neural Network - Test MSE: 7.310122509195074e-05, R²: 0.9839843511581421
Original Scale Predictions - Neural Network: [ 0.         2.1354895  2.0554235 63.624157   0.       ]


In [8]:
def find_test_row(df_main, df_sample):
    random_state = 0
    while True:
        test_row = df_main.sample(n=1, random_state=random_state)
        if not test_row.index.isin(df_sample.index).any():
            return test_row
        random_state += 1

# Find the test row
test_row = find_test_row(df, df_sample)

# Print the selected test row
print("Test row found:")
print(test_row)

KeyboardInterrupt: 

In [71]:
# Filter the DataFrame to include only rows with ResolutionTimeMinutes between 2 and 2.9
filtered_df = df[(df['ResolutionTimeMinutes'] >= 1.6) & (df['ResolutionTimeMinutes'] <= 1.9)]

In [72]:
xx = filtered_df.sample(n=10,random_state = 1224)
xx

Unnamed: 0,AssetId,organizationid,organizationcountrycode,locationid,AssetType,AlarmLabel,AlarmMessage,Severity,ActivatedTimestamp,ClearedTimestamp,...,ResolutionTimeMinutes,Lemmas_No_Stop_Words,Tokens,organizationid_code,organizationcountrycode_code,locationid_code,AssetType_code,AlarmLabel_code,AlarmMessage_code,Severity_code
37571,9339ab57-075b-401b-9519-382ec1f5c9dc,59062bc2-16ca-4b9d-b6e7-2720ebfd802e,GB,ebc0d7c9-9250-4950-b17f-64e5769a5e00,RPDU,High Temperature Threshold Violation,Rack PDU 1: High temperature threshold violati...,WARNING,2021-05-06T16:34:04Z,2021-05-06T16:35:52Z,...,1.8,"['high', 'temperature', 'threshold', 'violation']","['high', 'temperature', 'threshold', 'violation']",1168,34,9498,28,4601,23978,4
22830,0884b460-6ae1-497d-ac75-01d81ae54bdc,a6e2c146-021d-4036-9973-de0c1975692d,US,2f32a9eb-dd5b-4355-9f0e-82f861bba81f,POD,NetBotz Appliance Alarm,The value of 'redact' was too high; but has no...,ERROR,2021-06-15T02:18:55Z,2021-06-15T02:20:36Z,...,1.683333,"['netbotz', 'appliance', 'alarm']","['netbotz', 'appliance', 'alarm']",2176,93,1918,23,6513,39171,1
52764,04d82354-9794-4da1-a8c0-03e6d1be4013,f86de2dc-3bde-429a-94af-0f0850e10cca,US,0eb42999-2fcc-46f4-9a75-c0ad2eb9e012,ATS,Source Status Fault,Source A is unavailable or a status problem ex...,WARNING,2021-03-13T23:43:53Z,2021-03-13T23:45:41Z,...,1.8,"['source', 'status', 'fault']","['source', 'status', 'fault']",3181,93,606,3,8150,27669,4
55801,289b3e86-2107-4ab7-a2b1-d173f3868866,36cafccc-187a-445b-b601-d46eb06bf19f,FR,eaa13df7-5840-40c4-bf57-5cae36f7cd37,RPDU,Phase Near Overload Cleared,Rack PDU 1: A near overload threshold violatio...,WARNING,2021-05-22T07:52:48Z,2021-05-22T07:54:26Z,...,1.633333,"['phase', 'near', 'overload', 'clear']","['phase', 'near', 'overload', 'cleared']",708,33,9455,28,6969,23437,4
84601,f1daed81-c618-4bc2-88c5-8017773596b4,4bea371d-7048-490a-a933-fdf63e8f9ade,US,86c03d1c-85f0-4824-af4d-a1db1f439530,RPDU,Low Humidity Threshold Violation Cleared,Rack PDU 1: Low humidity threshold violation c...,WARNING,2021-03-16T03:53:37Z,2021-03-16T03:55:13Z,...,1.6,"['low', 'humidity', 'threshold', 'violation', ...","['low', 'humidity', 'threshold', 'violation', ...",997,93,5531,28,5856,24335,4
72269,27ae3ac2-475f-4973-8f82-c963dd4f7c7f,4a2c4e98-ab37-4e5f-92e8-3a956d1794f9,RO,Default Location,RPDU,CAN Bus Off,Rack PDU 1: CAN bus on,WARNING,2021-06-14T18:35:06Z,2021-06-14T18:36:55Z,...,1.816667,['bus'],"['can', 'bus', 'off']",962,79,6517,28,1917,23470,4
20795,56899c85-a973-4b22-a367-0a2c3174cff7,b089f434-eb43-4d8d-a86d-a40e3279414a,US,cc570913-ba14-4ea6-9012-9fd788944312,POD,NetBotz Appliance Alarm,The value of 'redact' was too low; but has now...,INFO,2021-03-11T03:07:15Z,2021-03-11T03:09:05Z,...,1.833333,"['netbotz', 'appliance', 'alarm']","['netbotz', 'appliance', 'alarm']",2309,93,8275,23,6513,39173,3
66355,d648f7bb-8625-449f-bad3-21cfd161f79f,fe89c15f-eb13-45ad-8a8f-2fa7b238a71b,US,Default Location,RPDU,Phase Near Overload,Rack PDU 1: A near overload threshold violatio...,WARNING,2021-04-23T11:55:04Z,2021-04-23T11:56:54Z,...,1.833333,"['phase', 'near', 'overload']","['phase', 'near', 'overload']",3262,93,6517,28,6968,23428,4
89595,c9ac5b92-4e4c-411c-ac81-316d8daae9cc,36cafccc-187a-445b-b601-d46eb06bf19f,FR,cac4ce42-5f3e-4e8e-9b18-2e8d9fd2811c,RPDU,Bank Overload Cleared,Rack PDU 1: An overload threshold violation no...,CRITICAL,2021-02-27T04:25:02Z,2021-02-27T04:26:41Z,...,1.65,"['bank', 'overload', 'clear']","['bank', 'overload', 'cleared']",708,33,8207,28,921,23460,0
37060,af2e544d-6aab-4790-9155-01abd9b29972,52763390-98b2-4fb8-9ffb-dc35c8147582,US,2fc3aee0-4771-417c-8a5a-115d3cc737a8,PDU,High Module Current Alarm,Distribution module 5 breaker L2 current is ab...,WARNING,2021-06-14T19:35:13Z,2021-06-14T19:37:06Z,...,1.883333,"['high', 'module', 'current', 'alarm']","['high', 'module', 'current', 'alarm']",1076,93,1936,22,4547,5693,4


In [73]:
test_row=xx.loc[[55801]]
test_row

Unnamed: 0,AssetId,organizationid,organizationcountrycode,locationid,AssetType,AlarmLabel,AlarmMessage,Severity,ActivatedTimestamp,ClearedTimestamp,...,ResolutionTimeMinutes,Lemmas_No_Stop_Words,Tokens,organizationid_code,organizationcountrycode_code,locationid_code,AssetType_code,AlarmLabel_code,AlarmMessage_code,Severity_code
55801,289b3e86-2107-4ab7-a2b1-d173f3868866,36cafccc-187a-445b-b601-d46eb06bf19f,FR,eaa13df7-5840-40c4-bf57-5cae36f7cd37,RPDU,Phase Near Overload Cleared,Rack PDU 1: A near overload threshold violatio...,WARNING,2021-05-22T07:52:48Z,2021-05-22T07:54:26Z,...,1.633333,"['phase', 'near', 'overload', 'clear']","['phase', 'near', 'overload', 'cleared']",708,33,9455,28,6969,23437,4


In [49]:
test_row = df[
    (df_sample['AssetId'].astype(str) == '6255f030-8744-4d1e-b7ca-f6f3e4e4774d') &
    (df_sample['organizationid'].astype(str) == 'c7553c18-7a3d-4728-8d19-b10267ae72e5') &
    (df_sample['organizationcountrycode'].astype(str) == 'US') &
    (df_sample['locationid'].astype(str) == '2ceb1e29-6c3a-44f3-8ab0-4f40e54b870e') &
    (df_sample['AssetType'].astype(str) == 'UPS') &
    (df_sample['AlarmLabel'].astype(str) == 'Battery Charger Fault') &
    (df_sample['AlarmMessage'].astype(str) == 'A battery charger error exists.') &
    (df_sample['Severity'].astype(str) == 'WARNING') 
]

  test_row = df[


In [53]:
test_row.sort_values(by ='ResolutionTimeMinutes', ascending = False).head(5)

Unnamed: 0,AssetId,organizationid,organizationcountrycode,locationid,AssetType,AlarmLabel,AlarmMessage,Severity,ActivatedTimestamp,ClearedTimestamp,...,ResolutionTimeMinutes,Lemmas_No_Stop_Words,Tokens,organizationid_code,organizationcountrycode_code,locationid_code,AssetType_code,AlarmLabel_code,AlarmMessage_code,Severity_code
70759,6255f030-8744-4d1e-b7ca-f6f3e4e4774d,c7553c18-7a3d-4728-8d19-b10267ae72e5,US,2ceb1e29-6c3a-44f3-8ab0-4f40e54b870e,UPS,Battery Charger Fault,A battery charger error exists.,WARNING,2021-06-28T09:50:17Z,2021-06-28T09:50:50Z,...,0.55,"['battery', 'charger', 'fault']","['battery', 'charger', 'fault']",2579,93,1809,33,1012,1177,4
72736,6255f030-8744-4d1e-b7ca-f6f3e4e4774d,c7553c18-7a3d-4728-8d19-b10267ae72e5,US,2ceb1e29-6c3a-44f3-8ab0-4f40e54b870e,UPS,Battery Charger Fault,A battery charger error exists.,WARNING,2021-04-29T05:00:59Z,2021-04-29T05:01:31Z,...,0.533333,"['battery', 'charger', 'fault']","['battery', 'charger', 'fault']",2579,93,1809,33,1012,1177,4
38214,6255f030-8744-4d1e-b7ca-f6f3e4e4774d,c7553c18-7a3d-4728-8d19-b10267ae72e5,US,2ceb1e29-6c3a-44f3-8ab0-4f40e54b870e,UPS,Battery Charger Fault,A battery charger error exists.,WARNING,2021-02-26T00:20:29Z,2021-02-26T00:21:01Z,...,0.533333,"['battery', 'charger', 'fault']","['battery', 'charger', 'fault']",2579,93,1809,33,1012,1177,4
85352,6255f030-8744-4d1e-b7ca-f6f3e4e4774d,c7553c18-7a3d-4728-8d19-b10267ae72e5,US,2ceb1e29-6c3a-44f3-8ab0-4f40e54b870e,UPS,Battery Charger Fault,A battery charger error exists.,WARNING,2021-02-18T16:20:41Z,2021-02-18T16:21:13Z,...,0.533333,"['battery', 'charger', 'fault']","['battery', 'charger', 'fault']",2579,93,1809,33,1012,1177,4
84853,6255f030-8744-4d1e-b7ca-f6f3e4e4774d,c7553c18-7a3d-4728-8d19-b10267ae72e5,US,2ceb1e29-6c3a-44f3-8ab0-4f40e54b870e,UPS,Battery Charger Fault,A battery charger error exists.,WARNING,2021-03-08T14:31:53Z,2021-03-08T14:32:25Z,...,0.533333,"['battery', 'charger', 'fault']","['battery', 'charger', 'fault']",2579,93,1809,33,1012,1177,4


In [42]:
test_row = test_row.loc[[5157]]

In [43]:
test_row

Unnamed: 0,AssetId,organizationid,organizationcountrycode,locationid,AssetType,AlarmLabel,AlarmMessage,Severity,ActivatedTimestamp,ClearedTimestamp,...,ResolutionTimeMinutes,Lemmas_No_Stop_Words,Tokens,organizationid_code,organizationcountrycode_code,locationid_code,AssetType_code,AlarmLabel_code,AlarmMessage_code,Severity_code
5157,6255f030-8744-4d1e-b7ca-f6f3e4e4774d,c7553c18-7a3d-4728-8d19-b10267ae72e5,US,2ceb1e29-6c3a-44f3-8ab0-4f40e54b870e,UPS,Battery Charger Fault,A battery charger error exists.,WARNING,2021-06-16T06:05:25Z,2021-06-16T06:05:55Z,...,0.5,"['battery', 'charger', 'fault']","['battery', 'charger', 'fault']",2579,93,1809,33,1012,1177,4


In [25]:
# Preprocess the test row
def preprocess_test_row(test_row, label_encoders, feature_scaler, scaler):
    # Encode categorical columns
    for column in ['organizationcountrycode', 'AssetType', 'AlarmLabel', 'Severity']:
        test_row[column] = label_encoders[column].transform(test_row[column].astype(str))
    
    # Convert timestamps to datetime and extract features
    test_row['ActivatedTimestamp'] = pd.to_datetime(test_row['ActivatedTimestamp'])
    test_row['ClearedTimestamp'] = pd.to_datetime(test_row['ClearedTimestamp'])
    test_row['ActivationHour'] = test_row['ActivatedTimestamp'].dt.hour
    test_row['ClearanceHour'] = test_row['ClearedTimestamp'].dt.hour
    test_row['ActivationDayOfWeek'] = test_row['ActivatedTimestamp'].dt.dayofweek
    test_row['ClearanceDayOfWeek'] = test_row['ClearedTimestamp'].dt.dayofweek
    test_row['ResolutionTime'] = (test_row['ClearedTimestamp'] - test_row['ActivatedTimestamp']).dt.total_seconds() / 60

    # Select and scale features
    features = [
        'organizationcountrycode', 'AssetType', 'AlarmLabel', 'Severity',
        'month', 'week', 'ActivationHour', 'ClearanceHour',
        'ActivationDayOfWeek', 'ClearanceDayOfWeek'
    ]
    X_test = test_row[features].values
    X_test = feature_scaler.transform(X_test)
    
    return X_test

# Preprocess the test row
X_test_processed = preprocess_test_row(test_row.copy(), label_encoders, feature_scaler, scaler)

# Convert to PyTorch tensor and move to device
X_test_tensor = torch.tensor(X_test_processed, dtype=torch.float32).to(device)

# Make prediction
model.eval()
with torch.no_grad():
    y_test_pred = model(X_test_tensor).detach().cpu().numpy()

# Ensure no negative predictions
y_test_pred = np.maximum(y_test_pred, 0)

# Convert predicted values back to original scale
nn_pred_test_original = scaler.inverse_transform(y_test_pred)

# Print the predicted resolution time
print(f"Predicted Resolution Time: {nn_pred_test_original[0][0]}")


Predicted Resolution Time: 1.7662739753723145




In [45]:


# Preprocess the test row
def preprocess_test_row(test_row, label_encoders, feature_scaler, scaler):
    # Encode categorical columns
    for column in [ 'organizationcountrycode',  'AssetType', 'AlarmLabel', 'Severity']:
        test_row[column] = label_encoders[column].transform(test_row[column].astype(str))
    
    # Convert timestamps to datetime and extract features
    test_row['ActivatedTimestamp'] = pd.to_datetime(test_row['ActivatedTimestamp'])
    test_row['ClearedTimestamp'] = pd.to_datetime(test_row['ClearedTimestamp'])
    test_row['ActivationHour'] = test_row['ActivatedTimestamp'].dt.hour
    test_row['ClearanceHour'] = test_row['ClearedTimestamp'].dt.hour
    test_row['ActivationDayOfWeek'] = test_row['ActivatedTimestamp'].dt.dayofweek
    test_row['ClearanceDayOfWeek'] = test_row['ClearedTimestamp'].dt.dayofweek
    test_row['ResolutionTime'] = (test_row['ClearedTimestamp'] - test_row['ActivatedTimestamp']).dt.total_seconds() / 60

    # Select and scale features
    features = [
        'organizationcountrycode', 'AssetType', 'AlarmLabel', 'Severity',
        'month', 'week', 'ActivationHour', 'ClearanceHour',
        'ActivationDayOfWeek', 'ClearanceDayOfWeek'
    ]
    X_test = test_row[features].values
    X_test = feature_scaler.transform(X_test)
    
    return X_test


# Preprocess the test row
X_test_processed = preprocess_test_row(test_row.copy(), label_encoders, feature_scaler, scaler)

# Convert to PyTorch tensor and move to device
X_test_tensor = torch.tensor(X_test_processed, dtype=torch.float32).to(device)

# Make prediction
model.eval()
with torch.no_grad():
    y_test_pred = model(X_test_tensor).detach().cpu().numpy()

# Ensure no negative predictions
y_test_pred = np.maximum(y_test_pred, 0)

# Convert predicted values back to original scale
nn_pred_test_original = scaler.inverse_transform(y_test_pred)

# Add the prediction to the test DataFrame
test_row['PredictedResolutionTime'] = nn_pred_test_original[0][0]

# Print the DataFrame with the predicted values
print("DataFrame with predicted values:")
print(test_row)


DataFrame with predicted values:
                                   AssetId  \
5157  6255f030-8744-4d1e-b7ca-f6f3e4e4774d   

                            organizationid organizationcountrycode  \
5157  c7553c18-7a3d-4728-8d19-b10267ae72e5                      US   

                                locationid AssetType             AlarmLabel  \
5157  2ceb1e29-6c3a-44f3-8ab0-4f40e54b870e       UPS  Battery Charger Fault   

                         AlarmMessage Severity    ActivatedTimestamp  \

          ClearedTimestamp  ...             Lemmas_No_Stop_Words  \
5157  2021-06-16T06:05:55Z  ...  ['battery', 'charger', 'fault']   

                               Tokens  organizationid_code  \
5157  ['battery', 'charger', 'fault']                 2579   

     organizationcountrycode_code locationid_code  AssetType_code  \
5157                           93            1809              33   

      AlarmLabel_code  AlarmMessage_code  Severity_code  \
5157             1012               1177 



In [47]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.feature_selection import mutual_info_regression
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import mean_squared_error, r2_score
from scipy import stats
import optuna

# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')


# Select relevant columns
selected_columns = [
    'organizationcountrycode', 'AssetType', 'AlarmLabel', 'Severity',
    'ActivatedTimestamp', 'ClearedTimestamp', 'month', 'week', 'ResolutionTimeMinutes'
]
df_selected = df_sample[selected_columns]

# Convert categorical columns to numerical using Label Encoding
label_encoders = {}
for column in ['organizationcountrycode', 'AssetType', 'AlarmLabel', 'Severity']:
    le = LabelEncoder()
    df_selected[column] = le.fit_transform(df_selected[column])
    label_encoders[column] = le

# Convert timestamps to datetime and extract features
df_selected['ActivatedTimestamp'] = pd.to_datetime(df_selected['ActivatedTimestamp'])
df_selected['ClearedTimestamp'] = pd.to_datetime(df_selected['ClearedTimestamp'])
df_selected['ActivationHour'] = df_selected['ActivatedTimestamp'].dt.hour
df_selected['ClearanceHour'] = df_selected['ClearedTimestamp'].dt.hour
df_selected['ActivationDayOfWeek'] = df_selected['ActivatedTimestamp'].dt.dayofweek
df_selected['ClearanceDayOfWeek'] = df_selected['ClearedTimestamp'].dt.dayofweek
df_selected['ResolutionTime'] = (df_selected['ClearedTimestamp'] - df_selected['ActivatedTimestamp']).dt.total_seconds() / 60

# Remove rows with invalid resolution times
df_selected = df_selected[(df_selected['ResolutionTime'] >= 0) & (df_selected['ResolutionTime'] <= 10000)]

# Remove outliers
z_scores = np.abs(stats.zscore(df_selected['ResolutionTimeMinutes']))
df_selected = df_selected[z_scores < 3]

# Normalize the ResolutionTimeMinutes column
scaler = MinMaxScaler()
df_selected['ResolutionTimeMinutes'] = scaler.fit_transform(df_selected[['ResolutionTimeMinutes']])

# Define features and target
features = [
    'organizationcountrycode', 'AssetType', 'AlarmLabel', 'Severity',
    'month', 'week', 'ActivationHour', 'ClearanceHour',
    'ActivationDayOfWeek', 'ClearanceDayOfWeek'
]
X = df_selected[features]
y = df_selected['ResolutionTimeMinutes']

# Scale the features
feature_scaler = StandardScaler()
X = feature_scaler.fit_transform(X)

# Convert to PyTorch tensors
X_tensor = torch.tensor(X, dtype=torch.float32).to(device)
y_tensor = torch.tensor(y, dtype=torch.float32).to(device).view(-1, 1)

# Split the data into train, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(X_tensor, y_tensor, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Create DataLoader
train_dataset = TensorDataset(X_train, y_train)
val_dataset = TensorDataset(X_val, y_val)
test_dataset = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Define the PyTorch neural network model
class Net(nn.Module):
    def __init__(self, input_dim, hidden_layers, activation):
        super(Net, self).__init__()
        layers = []
        for units in hidden_layers:
            layers.append(nn.Linear(input_dim, units))
            if activation == 'relu':
                layers.append(nn.ReLU())
            elif activation == 'tanh':
                layers.append(nn.Tanh())
            input_dim = units
        layers.append(nn.Linear(input_dim, 1))
        self.net = nn.Sequential(*layers)
    
    def forward(self, x):
        return self.net(x)

# Define the objective function for Optuna
def objective(trial):
    try:
        # Define the hyperparameter search space
        hidden_layers = trial.suggest_categorical('hidden_layers', [(100,), (100, 50), (150, 75), (200, 100)])
        activation = trial.suggest_categorical('activation', ['relu', 'tanh'])
        solver = trial.suggest_categorical('solver', ['adam', 'sgd'])
        lr = trial.suggest_float('lr', 1e-5, 1e-2, log=True)
        n_epochs = trial.suggest_int('n_epochs', 50, 100)

        # Initialize the model, loss function, and optimizer
        model = Net(X_train.shape[1], hidden_layers, activation).to(device)
        criterion = nn.MSELoss()
        if solver == 'adam':
            optimizer = optim.Adam(model.parameters(), lr=lr)
        else:
            optimizer = optim.SGD(model.parameters(), lr=lr)

        # Training loop
        model.train()
        for epoch in range(n_epochs):
            for X_batch, y_batch in train_loader:
                optimizer.zero_grad()
                outputs = model(X_batch)
                loss = criterion(outputs, y_batch)
                loss.backward()
                optimizer.step()
        
        # Validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                outputs = model(X_batch)
                loss = criterion(outputs, y_batch)
                val_loss += loss.item() * X_batch.size(0)
        val_loss /= len(val_loader.dataset)
        
        return val_loss
    except Exception as e:
        print(f"Error during trial: {e}")
        return float('inf')

# Run the optimization
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50, timeout=600)

# Get the best hyperparameters
best_params = study.best_params
print("Best hyperparameters:", best_params)

# Train the final model using the best hyperparameters
best_hidden_layers = best_params['hidden_layers']
best_activation = best_params['activation']
best_solver = best_params['solver']
best_lr = best_params['lr']
best_n_epochs = best_params['n_epochs']

model = Net(X_train.shape[1], best_hidden_layers, best_activation).to(device)
criterion = nn.MSELoss()
if best_solver == 'adam':
    optimizer = optim.Adam(model.parameters(), lr=best_lr)
else:
    optimizer = optim.SGD(model.parameters(), lr=best_lr)

# Training loop for the final model
model.train()
for epoch in range(best_n_epochs):
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()

# Evaluate the model on the test set
model.eval()
test_loss = 0
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        test_loss += loss.item() * X_batch.size(0)
test_loss /= len(test_loader.dataset)

# Calculate test R²
y_test_pred = model(X_test).detach().cpu().numpy()
y_test_true = y_test.detach().cpu().numpy()

# Ensure no negative predictions
y_test_pred = np.maximum(y_test_pred, 0)

test_r2 = r2_score(y_test_true, y_test_pred)

print(f"Neural Network - Test MSE: {test_loss}, R²: {test_r2}")

# Convert predicted values back to original scale
nn_pred_test_original = scaler.inverse_transform(y_test_pred)

# Ensure no negative values after inverse transform
nn_pred_test_original = np.maximum(nn_pred_test_original, 0)

# Display a few predicted values
print(f"Original Scale Predictions - Neural Network: {nn_pred_test_original[:5].ravel()}")



Using device: cuda


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_selected[column] = le.fit_transform(df_selected[column])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_selected[column] = le.fit_transform(df_selected[column])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_selected[column] = le.fit_transform(df_selected[column])
A value is trying to be s

Best hyperparameters: {'hidden_layers': (100, 50), 'activation': 'tanh', 'solver': 'adam', 'lr': 0.0009921666490312344, 'n_epochs': 60}
Neural Network - Test MSE: 7.131243552485371e-05, R²: 0.9841251373291016
Original Scale Predictions - Neural Network: [ 2.2933507  3.3710344  3.1168048 27.840858   3.753324 ]


In [74]:

# Preprocess the test row
def preprocess_test_row(test_row, label_encoders, feature_scaler, scaler):
    # Encode categorical columns
    for column in [ 'organizationcountrycode',  'AssetType', 'AlarmLabel', 'Severity']:
        test_row[column] = label_encoders[column].transform(test_row[column].astype(str))
    
    # Convert timestamps to datetime and extract features
    test_row['ActivatedTimestamp'] = pd.to_datetime(test_row['ActivatedTimestamp'])
    test_row['ClearedTimestamp'] = pd.to_datetime(test_row['ClearedTimestamp'])
    test_row['ActivationHour'] = test_row['ActivatedTimestamp'].dt.hour
    test_row['ClearanceHour'] = test_row['ClearedTimestamp'].dt.hour
    test_row['ActivationDayOfWeek'] = test_row['ActivatedTimestamp'].dt.dayofweek
    test_row['ClearanceDayOfWeek'] = test_row['ClearedTimestamp'].dt.dayofweek
    test_row['ResolutionTime'] = (test_row['ClearedTimestamp'] - test_row['ActivatedTimestamp']).dt.total_seconds() / 60

    # Select and scale features
    features = [
        'organizationcountrycode', 'AssetType', 'AlarmLabel', 'Severity',
        'month', 'week', 'ActivationHour', 'ClearanceHour',
        'ActivationDayOfWeek', 'ClearanceDayOfWeek'
    ]
    X_test = test_row[features].values
    X_test = feature_scaler.transform(X_test)
    
    return X_test


# Preprocess the test row
X_test_processed = preprocess_test_row(test_row.copy(), label_encoders, feature_scaler, scaler)

# Convert to PyTorch tensor and move to device
X_test_tensor = torch.tensor(X_test_processed, dtype=torch.float32).to(device)

# Make prediction
model.eval()
with torch.no_grad():
    y_test_pred = model(X_test_tensor).detach().cpu().numpy()

# Ensure no negative predictions
y_test_pred = np.maximum(y_test_pred, 0)

# Convert predicted values back to original scale
nn_pred_test_original = scaler.inverse_transform(y_test_pred)

# Add the prediction to the test DataFrame
test_row['PredictedResolutionTime'] = nn_pred_test_original[0][0]

# Print the DataFrame with the predicted values
print("DataFrame with predicted values:")
print(test_row)

DataFrame with predicted values:
                                    AssetId  \
55801  289b3e86-2107-4ab7-a2b1-d173f3868866   

                             organizationid organizationcountrycode  \
55801  36cafccc-187a-445b-b601-d46eb06bf19f                      FR   

                                 locationid AssetType  \
55801  eaa13df7-5840-40c4-bf57-5cae36f7cd37      RPDU   

                        AlarmLabel  \
55801  Phase Near Overload Cleared   

                                            AlarmMessage Severity  \

         ActivatedTimestamp      ClearedTimestamp  ...  \
55801  2021-05-22T07:52:48Z  2021-05-22T07:54:26Z  ...   

                         Lemmas_No_Stop_Words  \
55801  ['phase', 'near', 'overload', 'clear']   

                                         Tokens  organizationid_code  \
55801  ['phase', 'near', 'overload', 'cleared']                  708   

      organizationcountrycode_code locationid_code  AssetType_code  \
55801                           33  

