# ML Model Example

This example provides a simply query tutorial for InfluxDB Cloud Dedicated. 

In [58]:
import influxdb_client_3 as InfluxDBClient3
import numpy as np
from keras.layers import Input, Dense, Conv1D, Flatten, Reshape, TimeDistributed, Normalization
from keras.models import Model
from keras.optimizers import Adam
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

import datetime
from keras.callbacks import TensorBoard
from huggingface_hub import push_to_hub_keras
import plotly.io as pio
import plotly.express as px
import plotly.graph_objects as go
pio.renderers.default = "vscode"
import os, datetime


In [59]:
token = 'dzfx2CLrpmGjbTHmdbnw1cSB7Op89g6cQ2VD7EVRJYHiF8UN8bRqIaBK92TxnCa906qWjjV6KVk_TQrW0uH5IA=='
host = 'eu-central-1-1.aws.cloud2.influxdata.com'    
org = 'Jay-IOx'
db = 'robot'

client = InfluxDBClient3.InfluxDBClient3(token=token,
                         host=host,
                         org=org,
                         database=db) 
    
    


In [60]:
query = '''SELECT MOVING_AVERAGE(last("coord"), 5) as coord FROM "joints" WHERE time >= 1697030174180ms and time <= 1697030462986ms GROUP BY time(3s) fill(linear)'''
table = client.query(query=query, language="influxql")
normal = table.to_pandas().drop(columns=['iox::measurement']).set_index('time').dropna(how='all')
fig = px.line(normal, y=["coord"], title='raw coord' )
fig.show()

In [61]:
query = '''SELECT MOVING_AVERAGE(last("coord"), 5) as coord FROM "joints" WHERE time >= 1697030663033ms and time <= 1697030872564ms GROUP BY time(3s) fill(linear)'''
table = client.query(query=query, language="influxql")
death_loop = table.to_pandas().drop(columns=['iox::measurement']).set_index('time').dropna(how='all')
fig = px.line(death_loop, y=["coord"], title='raw coord' )
fig.show()

In [62]:
query = '''SELECT MOVING_AVERAGE(last("coord"), 5) as coord FROM "joints" WHERE time >= 1697031873606ms and time <= 1697032040567ms GROUP BY time(3s) fill(linear)'''
table = client.query(query=query, language="influxql")
dropped = table.to_pandas().drop(columns=['iox::measurement']).set_index('time').dropna(how='all')
fig = px.line(dropped, y=["coord"], title='raw coord' )
fig.show()

In [63]:


# Define the normalization layer
normalizer = Normalization(axis=-1)

# Adapt the normalization layer to your data
normalizer.adapt(normal[['coord']])

# Parameters for the model
timesteps = 15  # Number of timesteps in each sequence
input_dim = 1    # Since we are only using 'coord'
batch_size = 16
epochs = 100

# Reshape data for TCN - ensure your data is sorted by time before this step
X_train = np.array([normal['coord'][i:i+timesteps] for i in range(len(normal) - timesteps)])
y_train = np.copy(X_train)

# Building the TCN autoencoder model
input_shape = (timesteps, input_dim)

inputs = Input(shape=input_shape)

# Apply normalization
normalized_inputs = normalizer(inputs)

# Encoder: TCN layers
encoded = Conv1D(filters=32, kernel_size=3, dilation_rate=1, activation='relu', padding='same')(normalized_inputs)
encoded = Conv1D(filters=32, kernel_size=3, dilation_rate=2, activation='relu', padding='same')(encoded)
encoded = Conv1D(filters=32, kernel_size=3, dilation_rate=4, activation='relu', padding='same')(encoded)
encoded = Flatten()(encoded)

# Bottleneck
bottleneck = Dense(16, activation='relu')(encoded)

# Decoder: Repeat and reshape, followed by TCN layers
decoded = Dense(timesteps * 32, activation='relu')(bottleneck)
decoded = Reshape((timesteps, 32))(decoded)
decoded = Conv1D(filters=32, kernel_size=3, dilation_rate=2, activation='relu', padding='same')(decoded)
decoded = Conv1D(filters=64, kernel_size=3, dilation_rate=1, activation='relu', padding='same')(decoded)

# Output layer
decoded = TimeDistributed(Dense(input_dim))(decoded)

autoencoder = Model(inputs, decoded)
autoencoder.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
autoencoder.summary()

# Splitting the reshaped data for training and testing
X_train, X_test = train_test_split(X_train, test_size=0.2, random_state=42)

# Directory to store logs
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

# Creating the TensorBoard callback
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

# Train the model
history = autoencoder.fit(X_train, X_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, X_test), callbacks=[tensorboard_callback])


Model: "model_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_8 (InputLayer)        [(None, 15, 1)]           0         
                                                                 
 normalization_7 (Normaliza  (None, 15, 1)             3         
 tion)                                                           
                                                                 
 conv1d_35 (Conv1D)          (None, 15, 32)            128       
                                                                 
 conv1d_36 (Conv1D)          (None, 15, 32)            3104      
                                                                 
 conv1d_37 (Conv1D)          (None, 15, 32)            3104      
                                                                 
 flatten_7 (Flatten)         (None, 480)               0         
                                                           

In [64]:
# Reshape the data for prediction
X_pred = np.array([normal['coord'][i:i+timesteps] for i in range(len(normal) - timesteps)])

# Use the autoencoder to predict on the data
predictions = autoencoder.predict(X_pred)

# Calculate reconstruction error for each sequence
mse = np.mean(np.power(normal.values[-predictions.shape[0]:] - predictions[:, -1, :], 2), axis=1)

# Scale the MSE to a percentage
min_mse = np.min(mse)
max_mse = np.max(mse)
mse_percentage = ((mse - min_mse) / (max_mse - min_mse)) * 100

# Detect anomalies by comparing the scaled reconstruction error to some threshold
threshold = 60  # Define a threshold value (in percentage)

# Add 'is_anomalous' column to the DataFrame
normal = normal.iloc[-predictions.shape[0]:].copy()
normal['is_anomalous'] = mse_percentage > threshold
normal['mse_percentage'] = mse_percentage
normal['threshold'] = threshold


# Create a new figure
fig = go.Figure()


# Add a line for the actual load
fig.add_trace(go.Scatter(
    x=normal.index,
    y=normal["coord"],
    mode='lines',
    name='coord',
    line=dict(color='green'),  # specify line color here
))

# Add a line for the anomalous load
fig.add_trace(go.Scatter(
    x=normal[normal['is_anomalous']].index,
    y=normal[normal['is_anomalous']]["coord"],
    mode='markers',
    name='Anomalous coord',
    marker=dict(color='red', size=5),  # specify marker color and size here
))

# Show the plot
fig.show()
normal.to_csv('normal_data_result.csv')



In [65]:
# Reshape the data for prediction
X_pred = np.array([death_loop['coord'][i:i+timesteps] for i in range(len(death_loop) - timesteps)])

# Use the autoencoder to predict on the data
predictions = autoencoder.predict(X_pred)


# Calculate reconstruction error for each sequence
mse = np.mean(np.power(death_loop.values[-predictions.shape[0]:] - predictions[:, -1, :], 2), axis=1)

# Scale the MSE to a percentage
min_mse = np.min(mse)
max_mse = np.max(mse)
mse_percentage = ((mse - min_mse) / (max_mse - min_mse)) * 100

# Detect anomalies by comparing the scaled reconstruction error to some threshold
threshold = 60  # Define a threshold value (in percentage)

# Add 'is_anomalous' column to the DataFrame
death_loop = death_loop.iloc[-predictions.shape[0]:].copy()
death_loop['is_anomalous'] = mse_percentage > threshold
death_loop['mse_percentage'] = mse_percentage
normal['threshold'] = threshold



# Create a new figure
fig = go.Figure()


# Add a line for the actual load
fig.add_trace(go.Scatter(
    x=death_loop.index,
    y=death_loop["coord"],
    mode='lines',
    name='coord',
    line=dict(color='green'),  # specify line color here
))

# Add a line for the anomalous load
fig.add_trace(go.Scatter(
    x=death_loop[death_loop['is_anomalous']].index,
    y=death_loop[death_loop['is_anomalous']]["coord"],
    mode='markers',
    name='Anomalous coord',
    marker=dict(color='red', size=5),  # specify marker color and size here
))

# Show the plot
fig.show()
death_loop.to_csv('death_loop_result.csv')



In [66]:
# Reshape the data for prediction
X_pred = np.array([dropped['coord'][i:i+timesteps] for i in range(len(dropped) - timesteps)])

# Use the autoencoder to predict on the data
predictions = autoencoder.predict(X_pred)
# Calculate reconstruction error for each sequence
mse = np.mean(np.power(dropped.values[-predictions.shape[0]:] - predictions[:, -1, :], 2), axis=1)

# Scale the MSE to a percentage
min_mse = np.min(mse)
max_mse = np.max(mse)
mse_percentage = ((mse - min_mse) / (max_mse - min_mse)) * 100

# Detect anomalies by comparing the scaled reconstruction error to some threshold
threshold = 60  # Define a threshold value (in percentage)

# Add 'is_anomalous' column to the DataFrame
dropped = dropped.iloc[-predictions.shape[0]:].copy()
dropped['is_anomalous'] = mse_percentage > threshold
dropped['mse_percentage'] = mse_percentage
normal['threshold'] = threshold



# Create a new figure
fig = go.Figure()


# Add a line for the actual load
fig.add_trace(go.Scatter(
    x=dropped.index,
    y=dropped["coord"],
    mode='lines',
    name='coord',
    line=dict(color='green'),  # specify line color here
))

# Add a line for the anomalous load
fig.add_trace(go.Scatter(
    x=dropped[dropped['is_anomalous']].index,
    y=dropped[dropped['is_anomalous']]["coord"],
    mode='markers',
    name='Anomalous coord',
    marker=dict(color='red', size=5),  # specify marker color and size here
))

# Show the plot
fig.show()
dropped.to_csv('dropped_result.csv')

