# Group 14 - Project FP01
## Time series anomaly detection

This project aims at investigating the current state-of-the-arts TAD scenario.

In [52]:
import os
import time
import tsfel
import warnings
import datetime
import keras_tuner as kt
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import matplotlib.cm as cm
from sklearn import metrics
import plotly.express as px
import matplotlib.pyplot as plt
from sklearn import preprocessing
import plotly.graph_objects as go
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC, LinearSVC
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.ensemble import RandomForestClassifier, IsolationForest
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectFromModel
from sklearn.model_selection import RandomizedSearchCV
from sklearn.feature_selection import VarianceThreshold


import dataset as ds

In [53]:
# Path to the root directory of the dataset
ROOTDIR_DATASET_NORMAL = './dataset/normal/'
ROOTDIR_DATASET_COLLISION = './dataset/collisions/'

In [54]:
# TF_ENABLE_ONEDNN_OPTS=0 means that the model will not use the oneDNN library for optimization

import os
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'

### Dataset: Kuka-v1
In 5 different recording sessions, the robot executes several different operations, while being
monitored by several sensors. The sensed signals are collected, with different sampling frequencies
(1, 10, 100, 200 Hz),

In [55]:
# Dataset frequency
freq_01s = '0.1'
freq_001s = '0.01'
freq_0005s = '0.005'

### Data Loading

In [56]:
def get_df_action(filepaths_csv, filepaths_meta, action2int=None, delimiter=";"):
    # Load dataframes
    print("Loading data.")
    # Make dataframes
    # Some classes show the output boolean parameter as True rather than true. Fix here
    dfs_meta = list()
    for filepath in filepaths_meta:
        df_m = pd.read_csv(filepath, sep=delimiter)
        df_m.str_repr = df_m.str_repr.str.replace('True', 'true')
        df_m['filepath'] = filepath
        dfs_meta.append(df_m)

    df_meta = pd.concat(dfs_meta)
    df_meta.index = pd.to_datetime(df_meta.init_timestamp.astype('datetime64[ms]'), format="%Y-%m-%dT%H:%M:%S.%f")
    df_meta['completed_timestamp'] = pd.to_datetime(df_meta.completed_timestamp.astype('datetime64[ms]'),
                                                    format="%Y-%m-%dT%H:%M:%S.%f")
    df_meta['init_timestamp'] = pd.to_datetime(df_meta.init_timestamp.astype('datetime64[ms]'),
                                               format="%Y-%m-%dT%H:%M:%S.%f")

    # Eventually reduce number of classes
    # df_meta['str_repr'] = df_meta.str_repr.str.split('=', expand = True,n=1)[0]
    # df_meta['str_repr'] = df_meta.str_repr.str.split('(', expand=True, n=1)[0]

    actions = df_meta.str_repr.unique()
    dfs = [pd.read_csv(filepath_csv, sep=";") for filepath_csv in filepaths_csv]
    df = pd.concat(dfs)

    # Sort columns by name !!!
    df = df.sort_index(axis=1)

    # Set timestamp as index
    df.index = pd.to_datetime(df.time.astype('datetime64[ms]'), format="%Y-%m-%dT%H:%M:%S.%f")
    # Drop useless columns
    columns_to_drop = [column for column in df.columns if "Abb" in column or "Temperature" in column]
    df.drop(["machine_nameKuka Robot_export_active_energy",
             "machine_nameKuka Robot_import_reactive_energy"] + columns_to_drop, axis=1, inplace=True)
    signals = df.columns

    df_action = list()
    for action in actions:
        for index, row in df_meta[df_meta.str_repr == action].iterrows():
            start = row['init_timestamp']
            end = row['completed_timestamp']
            df_tmp = df.loc[start: end].copy()
            df_tmp['action'] = action
            # Duration as string (so is not considered a feature)
            df_tmp['duration'] = str((row['completed_timestamp'] - row['init_timestamp']).total_seconds())
            df_action.append(df_tmp)
    df_action = pd.concat(df_action, ignore_index=True)
    df_action.index = pd.to_datetime(df_action.time.astype('datetime64[ms]'), format="%Y-%m-%dT%H:%M:%S.%f")
    df_action = df_action[~df_action.index.duplicated(keep='first')]

    # Drop NaN
    df = df.dropna(axis=0)
    df_action = df_action.dropna(axis=0)

    if action2int is None:
        action2int = dict()
        j = 1
        for label in df_action.action.unique():
            action2int[label] = j
            j += 1

    df_merged = df.merge(df_action[['action']], left_index=True, right_index=True, how="left")
    # print(f"df_merged len: {len(df_merged)}")
    # Where df_merged in NaN Kuka is in idle state
    df_idle = df_merged[df_merged['action'].isna()].copy()
    df_idle['action'] = 'idle'
    df_idle['duration'] = df_action.duration.values.astype(float).mean().astype(str)
    df_action = pd.concat([df_action, df_idle])

    # ile label must be 0 for debug mode
    action2int['idle'] = 0
    print(f"Found {len(set(df_action['action']))} different actions.")
    print("Loading data done.\n")

    return df_action, df, df_meta, action2int

In [57]:
filepath_csv = [os.path.join(ROOTDIR_DATASET_NORMAL, f"rec{r}_20220811_rbtc_0.1s.csv") for r in [0, 2, 3, 4]]
filepath_meta = [os.path.join(ROOTDIR_DATASET_NORMAL, f"rec{r}_20220811_rbtc_0.1s.metadata") for r in [0, 2, 3, 4]]
df_action, df, df_meta, action2int = get_df_action(filepath_csv, filepath_meta)

Loading data.


Found 31 different actions.
Loading data done.



In [58]:
fig = go.Figure()
signals = [
    "sensor_id1_AngY",
    "sensor_id2_AngX",
    "sensor_id5_AngY",
    "sensor_id4_AccZ",
    "sensor_id4_AngX",
    "machine_nameKuka Robot_power"]

start = df.index[9000]
df_reduced = df.loc[start:]
duration = 120  # seconds
time_delta = df_reduced.index - start
df_interval = df_reduced[time_delta.total_seconds() <= duration]
df_interval.head()
j = 0

# Leveraging plotly express
n_colors = len(signals)
colors = px.colors.sample_colorscale("greys", [n/(n_colors -1) for n in range(n_colors)])  # From continuous colormap
colors = px.colors.qualitative.Set2  # From discrete colormap, see https://plotly.com/python/discrete-color/
df_signals = df_interval[signals].select_dtypes(['number'])
df_signals = df_signals / df_signals.max()
# print(df_signals.head())
fig = px.line(df_signals, x=df_signals.index, y=df_signals.columns, color_discrete_sequence=colors)

# Leveraging plotly graph object
colors_action = px.colors.qualitative.Antique
j = 0
for action in df_action.loc[df_interval.index].action.unique():
    df_action_interval = df_action.loc[df_interval.index]
    df_action_interval.head()
    df_action_single_action = df_action_interval[df_action_interval['action'] == action]
    fig.add_trace(go.Scatter(
        x=df_action_single_action.index,
        y=[-0.3] * len(df_action_single_action.index),
        line_shape="hv",
        line=dict(color=colors_action[j], width=2.5),
        name=action))
    j += 1

In [59]:

fig.update_layout(
    title="Some signals",
    xaxis_title="Time",
    yaxis_title="",
    legend_title="Legend",
    font=dict(
        family="Courier New, monospace",
        size=12,
        color="Black"
    )
)
fig.show()

In [60]:
action2int

{'pickFromPallet(2,2)=[true,1,0]': 1,
 'placeToPallet(1,2)=[true,0]': 2,
 'moveOverPallet(2,1)=[true,0]': 3,
 'moveOverPallet(1,2)=[true,0]': 4,
 'pickFromPallet(1,2)=[true,1,0]': 5,
 'placeToPallet(1,1)=[true,0]': 6,
 'moveOverPallet(1,3)=[true,0]': 7,
 'moveOverPallet(3,1)=[true,0]': 8,
 'pickFromPallet(3,2)=[true,1,0]': 9,
 'placeToPallet(1,3)=[true,0]': 10,
 'moveOverPallet(3,2)=[true,0]': 11,
 'moveOverPallet(2,3)=[true,0]': 12,
 'pickFromPallet(2,2)=[true,2,0]': 13,
 'placeToPallet(2,2)=[true,0]': 14,
 'pickFromPallet(1,2)=[true,2,0]': 15,
 'placeToPallet(2,1)=[true,0]': 16,
 'pickFromPallet(3,2)=[true,2,0]': 17,
 'placeToPallet(2,3)=[true,0]': 18,
 'pickFromPallet(2,2)=[true,3,0]': 19,
 'placeToPallet(3,2)=[true,0]': 20,
 'pickFromPallet(1,2)=[true,3,0]': 21,
 'placeToPallet(3,1)=[true,0]': 22,
 'pickFromPallet(3,2)=[true,3,0]': 23,
 'placeToPallet(3,3)=[true,0]': 24,
 'pickFromPallet(2,2)=[true,4,0]': 25,
 'placeToPallet(4,2)=[true,0]': 26,
 'pickFromPallet(1,2)=[true,4,0]': 27

In [61]:
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)

def get_features_ts(domain, df_action, df_meta, frequency, action2int):
    duration_dict = {1: 10, 10: 1, 100: 0.1, 200: 0.05}
    duration_min = duration_dict[int(frequency)]
    cfg = tsfel.get_features_by_domain(domain)
    dataframe_features = list()
    print("Computing features.")
    # Idle does not have associated timestamps. Window is set to 10 seconds
    df_by_action = df_action[df_action["action"] == "idle"].copy()
    X = tsfel.time_series_features_extractor(cfg,
                                             df_by_action.select_dtypes(['number']),
                                             fs=frequency,
                                             header_names=df_by_action.select_dtypes(['number']).columns + '-',
                                             window_size=int(frequency * 10),
                                             verbose=False)
    time = pd.to_datetime(df_by_action.time.astype('datetime64[ms]'), format="%Y-%m-%dT%H:%M:%S.%f")
    X['start'] = [t[0] for t in
                  tsfel.utils.signal_processing.signal_window_splitter(time, window_size=int(frequency * 10))]
    X['duration'] = 10
    X['end'] = X['start'] + pd.to_timedelta(X['duration'], 's')
    X['label'] = action2int["idle"]
    X.drop('duration', inplace=True, axis=1)
    dataframe_features.append(X)
    actions = list(df_action.action.unique())
    actions.remove("idle")
    for action in actions:
        df_by_action = df_action[df_action["action"] == action].copy()
        df_meta_by_action = df_meta[df_meta['str_repr'] == action].copy()
        df_meta_by_action['start'] = pd.to_datetime(df_meta_by_action.init_timestamp.astype('datetime64[ms]'),
                                                    format="%Y-%m-%dT%H:%M:%S.%f")
        df_meta_by_action['end'] = pd.to_datetime(
            df_meta_by_action.completed_timestamp.astype('datetime64[ms]'), format="%Y-%m-%dT%H:%M:%S.%f")
        for _, row in df_meta_by_action.iterrows():
            df_by_action_by_event = df_by_action.loc[row["start"]: row["end"]]
            if len(df_by_action_by_event) < duration_min * frequency:
                print(f"Skipped feature extraction for {action} {row['start']} : {row['end']}.")
                continue

            X = tsfel.calc_window_features(cfg,
                                           df_by_action_by_event.select_dtypes(['number']),
                                           header_names=df_by_action_by_event.select_dtypes(['number']).columns + '-',
                                           fs=frequency,
                                           verbose=False)
            # print(X.shape)
            X['label'] = action2int[action]
            X['start'] = row['start']
            X['end'] = row['end']
            dataframe_features.append(X)

    dataframe_features = pd.concat(dataframe_features)
    print("Computing features done.")
    return dataframe_features

In [62]:
start_time = time.time()
df_features = get_features_ts("statistical", df_action, df_meta, 10, action2int)
print("--- %s seconds ---" % (time.time() - start_time))

Computing features.


KeyboardInterrupt: 

In [None]:
df_features.isnull().values.any()

True

In [None]:
# df_features_nonan = df_features.drop((df_features.columns[df_features.isna().any()].tolist()), axis=1)
df_features_nonan = df_features.fillna(0)

In [None]:
df_train, df_test = train_test_split(df_features_nonan)
df_train.head()

Unnamed: 0,machine_nameKuka Robot_apparent_power-_Absolute energy,machine_nameKuka Robot_apparent_power-_Average power,machine_nameKuka Robot_apparent_power-_ECDF Percentile Count_0,machine_nameKuka Robot_apparent_power-_ECDF Percentile Count_1,machine_nameKuka Robot_apparent_power-_ECDF Percentile_0,machine_nameKuka Robot_apparent_power-_ECDF Percentile_1,machine_nameKuka Robot_apparent_power-_ECDF_0,machine_nameKuka Robot_apparent_power-_ECDF_1,machine_nameKuka Robot_apparent_power-_ECDF_2,machine_nameKuka Robot_apparent_power-_ECDF_3,...,sensor_id5_GyroZ-_Median absolute deviation,sensor_id5_GyroZ-_Min,sensor_id5_GyroZ-_Peak to peak distance,sensor_id5_GyroZ-_Root mean square,sensor_id5_GyroZ-_Skewness,sensor_id5_GyroZ-_Standard deviation,sensor_id5_GyroZ-_Variance,start,end,label
0,5032969.0,508380.707486,20.0,80.0,197.45224,249.886017,0.01,0.02,0.03,0.04,...,44.19,0.0,3999.94,2517.742566,0.408317,1943.981427,3779064.0,2022-08-11 15:18:18.932,2022-08-11 15:18:28.951,15
0,5064846.0,464664.801468,22.0,88.0,198.373657,239.497849,0.009091,0.018182,0.027273,0.036364,...,6.77,0.0,3999.94,2308.904911,0.692737,1876.613877,3521680.0,2022-08-11 16:26:55.357,2022-08-11 16:27:06.360,23
0,4280266.0,480928.778726,18.0,72.0,185.021515,250.011353,0.011111,0.022222,0.033333,0.044444,...,11.41,0.0,3999.94,2411.106194,0.553271,1913.390546,3661063.0,2022-08-11 15:19:26.912,2022-08-11 15:19:35.878,8
0,5492591.0,508573.247314,21.0,87.0,190.126831,272.724121,0.009174,0.018349,0.027523,0.036697,...,14.28,0.0,3999.94,2286.40645,0.721745,1865.651078,3480654.0,2022-08-11 13:10:13.331,2022-08-11 13:10:24.298,1
0,4692993.0,527302.579935,18.0,72.0,189.529114,271.914978,0.011111,0.022222,0.033333,0.044444,...,1.22,0.0,3999.94,2218.314427,0.815959,1836.562883,3372963.0,2022-08-11 13:16:52.609,2022-08-11 13:17:01.560,4


In [None]:
X_train = df_train.drop(["label", "start", "end"], axis=1)
y_train = df_train["label"]
X_test = df_test.drop(["label", "start", "end"], axis=1)
y_test = df_test["label"]

In [None]:
# Normalise features
scaler = preprocessing.StandardScaler()
scaler.fit(X_train)
X_train = pd.DataFrame(scaler.transform(X_train), columns=X_train.columns)

# Remove zero-variance features
selector_variance = VarianceThreshold()
selector_variance.fit(X_train)
X_train = pd.DataFrame(selector_variance.transform(X_train),
                        columns=X_train.columns.values[selector_variance.get_support()])

# Remove highly correlated features
corr_features = tsfel.correlated_features(X_train,
                                          threshold=0.95)
X_train.drop(corr_features, inplace=True, axis=1)

# Lasso selector
lsvc = LinearSVC(C=0.01, penalty="l1", dual=False).fit(X_train, y_train)
lasso = SelectFromModel(lsvc, prefit=True)
selected_features = X_train.columns.values[lasso.get_support()]
X_train = X_train[selected_features].copy()

# Labels
num_classes = len(set(y_train))
y_train_categorical = tf.keras.utils.to_categorical(y_train, num_classes=num_classes)

# Test
X_test = pd.DataFrame(selector_variance.transform(scaler.transform(X_test)),
                      columns=X_test.columns.values[selector_variance.get_support()])
X_test.drop(corr_features, inplace=True, axis=1)
X_test = X_test[selected_features].copy()

In [None]:
input_shape = (X_train.values.shape[1],)
num_classes = len(y_train_categorical[0])
print(input_shape)
print(num_classes)

(79,)
31


### Isolation forest from SKLEARN -- Tipologia: Multivariate unsupervised

In [None]:
clf = IsolationForest().fit(df_signals)

In [None]:
df_signals['sensor_id4_AccZ']

time
2022-08-11 13:23:45.619    0.012696
2022-08-11 13:23:45.719    0.007464
2022-08-11 13:23:45.819    0.006061
2022-08-11 13:23:45.919    0.007879
2022-08-11 13:23:46.019    0.008230
                             ...   
2022-08-11 13:25:45.219    0.018310
2022-08-11 13:25:45.319    0.021404
2022-08-11 13:25:45.419    0.020320
2022-08-11 13:25:45.519    0.016364
2022-08-11 13:25:45.619    0.015535
Name: sensor_id4_AccZ, Length: 1201, dtype: float64

In [None]:
#Hypotesis: Performs well on training since there are few anomalies(well defined outliers), worst on test set
#Fist su segnale di input (preso da normal)
# n_window = 400
# n_samples_tot = df_signals['sensor_id4_AccZ'].values.reshape(1, -1).shape[1]
# n_samples_window = int(n_samples_tot / n_window)
# print(df_signals['sensor_id4_AccZ'].values[:-1].reshape(n_window, n_samples_window).shape)
# clf = IsolationForest().fit(df_signals['sensor_id4_AccZ'].values[:-1].reshape(n_window, n_samples_window))
# y_pred_test = clf.predict(X_test)
# y_pred_test

(400, 3)


In [None]:
# Calculate the confusion matrix
# cm = confusion_matrix(y_test, y_pred_test)

# # Plot the confusion matrix
# plt.figure(figsize=(6, 6))
# sns.set(font_scale=1.3)
# sns.heatmap(cm, annot=True, fmt='d', cmap='coolwarm', cbar=False,
#             xticklabels=['Class 0', 'Class 1'],
#             yticklabels=['Class 0', 'Class 1'])

# plt.xlabel('Predicted Label', fontsize=14)
# plt.ylabel('True Label', fontsize=14)
# plt.title('Confusion Matrix', fontsize=16)
# plt.show()

# print("\nClassification Report:")
# print(classification_report(y_test, y_pred_test))

NameError: name 'y_pred_test' is not defined

### Collision set

# QUA USI IL CALSSIFICATORE TRAINATO SU NORMAL E VEDI COME PREDICE
# Ottenute preds metti insieme ai timestamp e vedi se al tempo tot c'era effettivamente anomalia
# Così riesci a dare accuracy del classificatore

In [None]:
ROOTDIR_DATASET_ANOMALY = "./dataset/collisions/"

In [None]:
print(os.path.join(ROOTDIR_DATASET_ANOMALY, "20220811_collisions_timestamp.xlsx"))

./dataset/collisions/20220811_collisions_timestamp.xlsx


# in questo documqnto abbiamo le collisioni effettive

In [None]:
collisions = pd.read_excel(os.path.join(ROOTDIR_DATASET_ANOMALY, "20220811_collisions_timestamp.xlsx"))
collisions_init = collisions[collisions['Inizio/fine'] == "i"].Timestamp - pd.to_timedelta([2] * len(collisions[collisions['Inizio/fine'] == "i"].Timestamp), 'h')

In [None]:

collisions

Unnamed: 0,Inizio/fine,Timestamp
0,i,2022-08-11 16:02:17.450
1,f,2022-08-11 16:02:21.460
2,i,2022-08-11 16:02:28.320
3,f,2022-08-11 16:02:31.420
4,i,2022-08-11 16:02:45.770
...,...,...
97,f,2022-08-11 19:09:28.580
98,i,2022-08-11 19:09:38.750
99,f,2022-08-11 19:09:42.830
100,i,2022-08-11 19:09:53.950


In [None]:
filepath_csv = [os.path.join(ROOTDIR_DATASET_ANOMALY, f"rec{r}_collision_20220811_rbtc_0.1s.csv") for r in [1, 5]]
filepath_meta = [os.path.join(ROOTDIR_DATASET_ANOMALY, f"rec{r}_collision_20220811_rbtc_0.1s.metadata") for r in [1, 5]]
df_action, df, df_meta, action2int = ds.get_df_action(filepath_csv, filepath_meta)

Loading data.
Found 31 different actions.
Loading data done.



In [None]:
df_meta.head()

Unnamed: 0_level_0,id,state_id,str_repr,init_timestamp,running_timestamp,completed_timestamp,filepath
init_timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-08-11 13:09:50.231,RBTC_undefined__971d69bf-6e88-40e6-995b-a22c78...,69793226882,"pickFromPallet(2,2)=[true,1,0]",2022-08-11 13:09:50.231,2022-08-11T13:09:50.277217+00:00,2022-08-11 13:10:02.283,./dataset/normal/rec0_20220811_rbtc_0.1s.metadata
2022-08-11 13:10:02.283,RBTC_undefined__971d69bf-6e88-40e6-995b-a22c78...,1073750083,"placeToPallet(1,2)=[true,0]",2022-08-11 13:10:02.283,2022-08-11T13:10:02.301121+00:00,2022-08-11 13:10:13.331,./dataset/normal/rec0_20220811_rbtc_0.1s.metadata
2022-08-11 13:10:13.331,RBTC_undefined__971d69bf-6e88-40e6-995b-a22c78...,69793226882,"pickFromPallet(2,2)=[true,1,0]",2022-08-11 13:10:13.331,2022-08-11T13:10:13.349625+00:00,2022-08-11 13:10:24.298,./dataset/normal/rec0_20220811_rbtc_0.1s.metadata
2022-08-11 13:10:24.298,RBTC_undefined__971d69bf-6e88-40e6-995b-a22c78...,1073750083,"placeToPallet(1,2)=[true,0]",2022-08-11 13:10:24.298,2022-08-11T13:10:24.314347+00:00,2022-08-11 13:10:35.322,./dataset/normal/rec0_20220811_rbtc_0.1s.metadata
2022-08-11 13:10:35.322,RBTC_undefined__971d69bf-6e88-40e6-995b-a22c78...,69793226882,"pickFromPallet(2,2)=[true,1,0]",2022-08-11 13:10:35.322,2022-08-11T13:10:35.345241+00:00,2022-08-11 13:10:46.332,./dataset/normal/rec0_20220811_rbtc_0.1s.metadata


In [None]:
fig = go.Figure()
signals = [
    "sensor_id1_AngY",
    "sensor_id2_AngX",
    "sensor_id5_AngY",
    "sensor_id4_AccZ",
    "sensor_id4_AngX",
    "machine_nameKuka Robot_power"]

start = df.index[9000]
df_reduced = df.loc[start:]
duration = 120  # seconds
time_delta = df_reduced.index - start
df_interval = df_reduced[time_delta.total_seconds() <= duration]
j = 0

# Leveraging plotly express
n_colors = len(signals)
colors = px.colors.sample_colorscale("greys", [n/(n_colors -1) for n in range(n_colors)])  # From continuous colormap
colors = px.colors.qualitative.Set2  # From discrete colormap, see https://plotly.com/python/discrete-color/
df_signals = df_interval[signals].select_dtypes(['number'])
df_signals = df_signals / df_signals.max()
fig = px.line(df_signals, x=df_signals.index, y=df_signals.columns, color_discrete_sequence=colors)

# Leveraging plotly graph object
colors_action = px.colors.qualitative.Antique
j = 0
for action in df_action.loc[df_interval.index].action.unique():
    df_action_interval = df_action.loc[df_interval.index]
    df_action_single_action = df_action_interval[df_action_interval['action'] == action]
    fig.add_trace(go.Scatter(
        x=df_action_single_action.index,
        y=[-0.3] * len(df_action_single_action.index),
        line_shape="hv",
        line=dict(color=colors_action[j], width=2.5),
        name=action))
    j += 1



In [None]:
df_meta['init_timestamp'].to_numpy().shape

(947,)

In [None]:
df_meta['completed_timestamp'].to_numpy().shape

(947,)

In [64]:
y_pred_test.shape

(1201,)

In [63]:
# print(df_signals['sensor_id4_AccZ'].values[:-1].reshape(n_window, n_samples_window))
preds_test = clf.predict(df_signals)
y_pred_test = np.where(preds_test == 1, 1, 0)
y_pred_test

time_pred_signal = np.hstack((df_meta['init_timestamp'].to_numpy(), df_meta['completed_timestamp'].to_numpy(), y_pred_test))
time_pred_signal

#Noi non abbiamo df con le ground truth collision giusto? Perchè allora non ha senso rpedictare se poi non hai y_test
#Neanche dall0'altra lo fa, come valutiamo il modello?? Con ROC

DTypePromotionError: The DType <class 'numpy.dtypes.Int32DType'> could not be promoted by <class 'numpy.dtypes.DateTime64DType'>. This means that no common DType exists for the given inputs. For example they cannot be stored in a single array unless the dtype is `object`. The full list of DTypes is: (<class 'numpy.dtypes.DateTime64DType'>, <class 'numpy.dtypes.DateTime64DType'>, <class 'numpy.dtypes.Int32DType'>)

In [None]:
# Calculate the confusion matrix
cm = confusion_matrix(y_test, y_pred_test)

# Plot the confusion matrix
plt.figure(figsize=(6, 6))
sns.set(font_scale=1.3)
sns.heatmap(cm, annot=True, fmt='d', cmap='coolwarm', cbar=False,
            xticklabels=['Class 0', 'Class 1'],
            yticklabels=['Class 0', 'Class 1'])

plt.xlabel('Predicted Label', fontsize=14)
plt.ylabel('True Label', fontsize=14)
plt.title('Confusion Matrix', fontsize=16)
plt.show()

print("\nClassification Report:")
print(classification_report(y_test, y_pred_test))

ValueError: Found input variables with inconsistent numbers of samples: [244, 400]

In [None]:
fig.update_layout(
    title="Some signals",
    xaxis_title="Time",
    yaxis_title="",
    legend_title="Legend",
    font=dict(
        family="Courier New, monospace",
        size=12,
        color="Black"
    )
)
fig.show()

In [None]:
start_time = time.time()
df_features_collision = ds.get_features_ts("statistical", df_action, df_meta, 10, action2int)
print("--- %s seconds ---" % (time.time() - start_time))

Computing features.


Skipped feature extraction for pickFromPallet(1,2)=[true,1,0] 2022-08-11 14:37:37.436000 : 2022-08-11 14:37:37.421000.
Skipped feature extraction for placeToPallet(1,1)=[true,0] 2022-08-11 14:37:37.421000 : 2022-08-11 14:37:37.442000.
Skipped feature extraction for pickFromPallet(3,2)=[true,1,0] 2022-08-11 15:36:32.568000 : 2022-08-11 15:36:32.533000.
Skipped feature extraction for pickFromPallet(3,2)=[true,1,0] 2022-08-11 15:36:32.572000 : 2022-08-11 15:36:32.561000.
Skipped feature extraction for placeToPallet(1,3)=[true,0] 2022-08-11 15:36:32.533000 : 2022-08-11 15:36:32.572000.
Skipped feature extraction for placeToPallet(1,3)=[true,0] 2022-08-11 15:36:32.561000 : 2022-08-11 15:36:32.561000.


KeyboardInterrupt: 

In [None]:
df_features_collision.isnull().values.any()

True

In [None]:
df_features_collision_nonan = df_features_collision.fillna(0)

In [None]:
X_collision = df_features_collision_nonan.drop(["label", "start", "end"], axis=1)
y_collision = df_features_collision_nonan["label"]

In [None]:
X_collision = pd.DataFrame(selector_variance.transform(scaler.transform(X_collision)),
                           columns=X_collision.columns.values[selector_variance.get_support()])
X_collision.drop(corr_features, inplace=True, axis=1)
X_collision = X_collision[selected_features].copy()