In [36]:
# Import required libraries
import pandas as pd
from sklearn.ensemble import IsolationForest

In [37]:
# Load the data from all the files and combine them into one dataframe
filenames = ['data0 (1).txt', 'data109.txt', 'data112.txt', 'data88.txt', 'data89.txt', 'data9.txt']
df_list = []
for filename in filenames:
    # Use read_csv with comma as separator to load the data into a dataframe
    df = pd.read_csv(filename, sep=",", header=None)
    df_list.append(df)
merged_df = pd.concat(df_list, ignore_index=True)

In [38]:
merged_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,9991,9992,9993,9994,9995,9996,9997,9998,9999,10000
0,0,0,0,0,0,0,0,0,0,0,...,0,0,128,0,248,248,128,0,0,
1,0,0,0,0,128,0,0,0,248,0,...,0,0,0,0,0,0,0,0,0,
2,120,0,0,120,0,0,0,120,0,0,...,0,0,128,0,0,248,0,0,0,
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,120,0,0,


In [39]:
print(merged_df.isnull().sum())

0        0
1        0
2        0
3        0
4        0
        ..
9996     0
9997     0
9998     0
9999     0
10000    6
Length: 10001, dtype: int64


In [40]:
merged_df.fillna(0, inplace=True)

In [41]:
merged_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,9991,9992,9993,9994,9995,9996,9997,9998,9999,10000
0,0,0,0,0,0,0,0,0,0,0,...,0,0,128,0,248,248,128,0,0,0.0
1,0,0,0,0,128,0,0,0,248,0,...,0,0,0,0,0,0,0,0,0,0.0
2,120,0,0,120,0,0,0,120,0,0,...,0,0,128,0,0,248,0,0,0,0.0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0.0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,120,0,0,0.0


In [42]:
# Train the Isolation Forest model
isolation_forest = IsolationForest(n_estimators=100, max_samples='auto', contamination='auto', random_state=42)
isolation_forest.fit(merged_df)

IsolationForest(random_state=42)

In [43]:
# Predict anomalies in the merged dataset
merged_df['anomaly'] = isolation_forest.predict(merged_df)

In [44]:
merged_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,9992,9993,9994,9995,9996,9997,9998,9999,10000,anomaly
0,0,0,0,0,0,0,0,0,0,0,...,0,128,0,248,248,128,0,0,0.0,-1
1,0,0,0,0,128,0,0,0,248,0,...,0,0,0,0,0,0,0,0,0.0,1
2,120,0,0,120,0,0,0,120,0,0,...,0,128,0,0,248,0,0,0,0.0,1
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0.0,1
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,120,0,0,0.0,1


In [45]:
# Extract the anomalies from the merged dataset
anomalies = merged_df.loc[merged_df['anomaly'] == -1]

In [46]:


print(anomalies)

   0    1    2  3  4    5  6  7    8    9  ...  9992  9993  9994  9995  9996  \
0  0    0    0  0  0    0  0  0    0    0  ...     0   128     0   248   248   
5  0  128  248  0  0  128  0  0  128  248  ...   248   120   128     0   248   

   9997  9998  9999  10000  anomaly  
0   128     0     0    0.0       -1  
5   128     0   248    0.0       -1  

[2 rows x 10002 columns]


In [47]:
merged_df.loc[merged_df['anomaly'] == 1]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,9992,9993,9994,9995,9996,9997,9998,9999,10000,anomaly
1,0,0,0,0,128,0,0,0,248,0,...,0,0,0,0,0,0,0,0,0.0,1
2,120,0,0,120,0,0,0,120,0,0,...,0,128,0,0,248,0,0,0,0.0,1
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0.0,1
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,120,0,0,0.0,1


If you are getting an empty DataFrame after executing print(anomalies), it could be because there were no anomalies detected by the Isolation Forest model. This could be due to several reasons such as the data being too clean or the model parameters not being suitable for the data.

You can try experimenting with different parameter values for the Isolation Forest model and also try visualizing the data to gain more insights into the pattern of the data. Additionally, you can also try other anomaly detection algorithms and compare their performance with the Isolation Forest model.

Here's how this code works:

We first import the required libraries, which includes pandas for loading and manipulating the data, and scikit-learn for training and evaluating the model.

We then load the data from the text files using pandas. In this example, we assume that the data is stored in six text files named file1.txt to file6.txt. We read each file into a separate data frame, and then merge the data frames into a single data frame using pandas' concat() function.

We train the Isolation Forest model on the merged data frame. We use the IsolationForest class from scikit-learn, and set the number of estimators to 100, which controls the number of trees in the forest. We also set the max_samples parameter to 'auto', which means that the size of the sample used to build each tree is set to the size of the input data. We set contamination to 'auto', which sets the proportion of outliers in the data to be the same as the proportion of anomalies in the training data. Finally, we set behaviour to 'new' to ensure that the model works with the latest version of scikit-learn, and set the random_state parameter to 42 to ensure reproducibility.

We use the trained model to predict anomalies in the data. We apply the decision_function() method of the Isolation Forest object to compute the anomaly scores for each data point, which are used to determine whether the point is an outlier or not. We also apply the predict() method to generate a binary prediction of whether each point is an anomaly or not.

We output the results to the console, which includes the original timestamp and current values, as well as the anomaly score and prediction for each point.

Note that this is just a simple example of how to build an ML model for detecting anomalies in motor current signature data. In practice, you may need to perform additional data preprocessing, feature engineering, and model selection to optimize the performance of the model.






In [12]:
from sklearn.svm import OneClassSVM
import numpy as np

In [30]:
# Split the data into training and test sets
train_size = int(len(merged_df) * 0.7)
train_data = merged_df[:train_size]
test_data = merged_df[train_size:]


In [31]:
# Train the one-class SVM model
nu = 0.1  # The proportion of outliers expected in the data
one_class_svm = OneClassSVM(nu=nu)
one_class_svm.fit(train_data)



OneClassSVM(nu=0.1)

In [32]:
# Use the trained model to detect anomalies in the test set
test_data['anomaly'] = np.where(one_class_svm.predict(test_data) == -1, 1, 0)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_data['anomaly'] = np.where(one_class_svm.predict(test_data) == -1, 1, 0)


In [33]:
# Print the anomalies found in the test set
anomalies = test_data[test_data['anomaly'] == 1]
print(anomalies)

   0    1    2  3  4    5  6  7    8    9  ...  9992  9993  9994  9995  9996  \
4  0    0    0  0  0    0  0  0    0    0  ...     0     0     0     0     0   
5  0  128  248  0  0  128  0  0  128  248  ...   248   120   128     0   248   

   9997  9998  9999  10000  anomaly  
4   120     0     0    0.0        1  
5   128     0   248    0.0        1  

[2 rows x 10002 columns]
