# 데이터 생성

In [14]:
import numpy as np
import pandas as pd

In [15]:
mean = [0 for _ in range(5)]
cov = np.eye(5)
normal = pd.DataFrame(np.random.multivariate_normal(mean=mean, cov=cov, size = 500))

In [16]:
normal

Unnamed: 0,0,1,2,3,4
0,-0.536328,-0.452150,-0.080984,-0.214996,0.451842
1,0.874567,1.459543,0.539577,0.383107,-0.470852
2,-0.069316,0.142783,-1.475447,1.747590,-0.949459
3,0.773159,-0.490001,-1.309141,-0.141343,-0.307030
4,1.235538,0.231041,0.993230,-0.303776,0.523810
...,...,...,...,...,...
495,-1.074183,-1.342190,-0.570420,-0.723449,0.567621
496,1.186357,0.013864,0.730098,1.448959,0.117469
497,1.156691,-0.663067,1.693652,2.082000,0.888274
498,0.846212,-0.072108,0.340621,0.372832,0.031413


In [17]:
anomaly = pd.DataFrame(np.random.uniform(30,40,(10,5)))
anomaly

Unnamed: 0,0,1,2,3,4
0,36.585336,32.836304,37.979411,39.864744,33.966332
1,36.43059,38.293807,30.708138,38.142258,31.486132
2,35.601127,33.99639,34.020772,30.528358,32.024013
3,39.251823,39.997721,37.24744,30.630479,34.011671
4,36.813436,33.883821,31.970012,35.804098,38.260749
5,39.718829,38.072204,35.158335,30.698423,37.379678
6,31.228046,35.978642,36.751949,38.377993,31.182496
7,34.797219,30.463893,30.071725,31.825497,36.209818
8,38.296355,39.428887,34.656288,37.040041,35.975783
9,35.560127,35.240706,36.635688,34.244251,39.940563


In [36]:
normal_labels = np.array([1 for _ in range(500)])
anomaly_labels = np.array([-1 for _ in range(10)])
labels = pd.DataFrame(np.concatenate([normal_labels, anomaly_labels]))
labels.columns = ['label']
labels

Unnamed: 0,label
0,1
1,1
2,1
3,1
4,1
...,...
505,-1
506,-1
507,-1
508,-1


In [37]:
x_data = pd.concat([normal, anomaly])
x_data = x_data.reset_index(drop=True)
data = pd.concat([x_data, labels], axis=1)
data

Unnamed: 0,0,1,2,3,4,label
0,-0.536328,-0.452150,-0.080984,-0.214996,0.451842,1
1,0.874567,1.459543,0.539577,0.383107,-0.470852,1
2,-0.069316,0.142783,-1.475447,1.747590,-0.949459,1
3,0.773159,-0.490001,-1.309141,-0.141343,-0.307030,1
4,1.235538,0.231041,0.993230,-0.303776,0.523810,1
...,...,...,...,...,...,...
505,39.718829,38.072204,35.158335,30.698423,37.379678,-1
506,31.228046,35.978642,36.751949,38.377993,31.182496,-1
507,34.797219,30.463893,30.071725,31.825497,36.209818,-1
508,38.296355,39.428887,34.656288,37.040041,35.975783,-1


In [38]:
X = data.iloc[:,:5]
y = data.iloc[:,-1]
print(X)
print(y)

             0          1          2          3          4
0    -0.536328  -0.452150  -0.080984  -0.214996   0.451842
1     0.874567   1.459543   0.539577   0.383107  -0.470852
2    -0.069316   0.142783  -1.475447   1.747590  -0.949459
3     0.773159  -0.490001  -1.309141  -0.141343  -0.307030
4     1.235538   0.231041   0.993230  -0.303776   0.523810
..         ...        ...        ...        ...        ...
505  39.718829  38.072204  35.158335  30.698423  37.379678
506  31.228046  35.978642  36.751949  38.377993  31.182496
507  34.797219  30.463893  30.071725  31.825497  36.209818
508  38.296355  39.428887  34.656288  37.040041  35.975783
509  35.560127  35.240706  36.635688  34.244251  39.940563

[510 rows x 5 columns]
0      1
1      1
2      1
3      1
4      1
      ..
505   -1
506   -1
507   -1
508   -1
509   -1
Name: label, Length: 510, dtype: int32


In [82]:
X_train = data.iloc[:400,:5]
X_test = data.iloc[400:,:5]
y_train = data.iloc[:400,-1]
y_test = data.iloc[400:,-1]

# LOF

In [84]:
from sklearn.neighbors import LocalOutlierFactor

lof_clf = LocalOutlierFactor(n_neighbors=20) # default 값을 사용
pred = lof_clf.fit_predict(X)
correct = (pred == y).sum()
acc = (correct/len(y))*100

acc

98.62745098039215

# Isolation Forest

In [62]:
from sklearn.ensemble import IsolationForest

isol_clf = IsolationForest(random_state=0).fit(X_train)
pred = isol_clf.predict(X_test)
correct = (pred == y_test).sum()
acc = (correct/len(y_test))*100

acc

86.36363636363636

# One-Class SVM

In [77]:
from sklearn.svm import OneClassSVM

ocsvm_clf = OneClassSVM(kernel='linear', gamma='scale').fit(X_train)
pred = ocsvm_clf.predict(X_test)
correct = (pred == y_test).sum()
acc = (correct/len(y_test))*100

acc

59.09090909090909

# SVDD

In [81]:
from sklearn.svm import OneClassSVM

ocsvm_clf = OneClassSVM(kernel='rbf', gamma='scale').fit(X_train)
pred = ocsvm_clf.predict(X_test)
correct = (pred == y_test).sum()
acc = (correct/len(y_test))*100

acc

50.0