In [1]:
import numpy as np
import pandas as pd
from sklearn.neighbors import LocalOutlierFactor
from sklearn.preprocessing import StandardScaler 

In [2]:
import time
import psutil
import os

In [3]:
data=pd.read_csv('C://Users//Admin//OneDrive//Desktop//HPE//data_new.csv')
data['timestamp'] = pd.to_datetime(data['timestamp'])
data.set_index('timestamp', inplace=True)

In [4]:
X=data.iloc[:,0:-3:2]
Y=data.iloc[:,-3]

In [5]:
#Scaling the data
sc = StandardScaler() 
data_scaled = sc.fit_transform(X) 

In [6]:
data_scaled

array([[-1.80330869,  1.36189313, -1.95855627, ...,  1.5935427 ,
         0.8385898 ,  0.95652271],
       [-1.73987955,  1.33376679, -1.90372596, ...,  1.44596626,
         0.97371269,  0.97555823],
       [-1.85890228,  1.40415136, -1.96218035, ...,  1.5425039 ,
         0.9958703 ,  0.96931758],
       ...,
       [ 1.61684579, -1.81059977,  2.43937319, ..., -1.81903133,
        -1.98664325, -1.81658568],
       [ 1.62859117, -1.84251329,  1.18360853, ..., -1.77822097,
        -2.02179989, -1.90598922],
       [ 1.7276634 , -1.9037951 ,  1.20462524, ..., -1.91071357,
        -1.98442295, -1.81032349]])

In [7]:
#Creating model
np.random.seed(42)
model = LocalOutlierFactor(contamination= 0.26 )

In [8]:
# get the start time
start_wall_time = time.time()
start_cpu_time = time.process_time()

In [9]:
label=model.fit_predict(data_scaled)

In [10]:
# get the end time
end_wall_time = time.time()
end_cpu_time = time.process_time()

In [11]:
#the predicted labels for each tuple . -1 indicates it is an anomulous tuple and 1 indicates it is not an anamolous tuple
label

array([ 1,  1,  1, ..., -1, -1, -1])

In [12]:
#Changing 1 to 0 and -1 to 1 
label[label == 1] = 0
label[label == -1] = 1


In [13]:
#1 indicates it is an anomulous tuple and 0 indicates it is not an anamolous tuple
label

array([0, 0, 0, ..., 1, 1, 1])

In [14]:
data['Predictions'] = label

In [15]:
data['Predictions'].value_counts()

0    746
1    262
Name: Predictions, dtype: int64

In [16]:
anomalies = data[data['Predictions'] == 1]
print(anomalies['Predictions'])

timestamp
2021-01-01 07:00:00    1
2021-01-01 08:00:00    1
2021-01-01 09:00:00    1
2021-01-01 10:00:00    1
2021-01-01 11:00:00    1
                      ..
2021-02-11 19:00:00    1
2021-02-11 20:00:00    1
2021-02-11 21:00:00    1
2021-02-11 22:00:00    1
2021-02-11 23:00:00    1
Name: Predictions, Length: 262, dtype: int32


In [17]:
# get the execution time
wall_time = end_wall_time - start_wall_time
cpu_time = end_cpu_time - start_cpu_time

In [18]:
print(f'Elapsed time: {round((wall_time),4)} seconds')
print(f'CPU time: {round((cpu_time),4)} seconds')

Elapsed time: 1.7894 seconds
CPU time: 1.25 seconds


In [19]:
print(f'Physical Memory usage: {int(psutil.Process(os.getpid()).memory_info().rss / 1024 ** 2)} MB') 
print(f'Virtual Memory usage:  {int(psutil.Process(os.getpid()).memory_info().vms / 1024 ** 2)} MB')

Physical Memory usage: 160 MB
Virtual Memory usage:  170 MB


In [20]:
#PERFORMANCE PREDICTION
from sklearn.metrics import accuracy_score, precision_score, f1_score, recall_score
import pandas as pd


true_values = data['Overall anomaly'].values
predicted_values = data['Predictions'].values
precision = precision_score(true_values, predicted_values)
f1=f1_score(true_values, predicted_values)
recall=recall_score(true_values, predicted_values)
accuracy= accuracy_score(true_values, predicted_values)

print("precision=",precision)
print("f1=",f1)
print("recall=",recall)
print("accuracy=",accuracy)

precision= 0.5877862595419847
f1= 0.5866666666666667
recall= 0.5855513307984791
accuracy= 0.7847222222222222
