In [1]:
"""
air_inflow: 공기 흡입 유량 (^3/min)
air_end_temp: 공기 말단 온도 (°C)
out_pressure: 토출 압력 (Mpa)
motor_current: 모터 전류 (A)
motor_rpm: 모터 회전수 (rpm)
motor_temp: 모터 온도 (°C)
motor_vibe: 모터 진동 (mm/s)
type: 설비 번호

설비 번호 [0, 4, 5, 6, 7]: 30HP(마력)
설비 번호 1: 20HP
설비 번호 2: 10HP
설비 번호 3: 50HP
"""

'\nair_inflow: 공기 흡입 유량 (^3/min)\nair_end_temp: 공기 말단 온도 (°C)\nout_pressure: 토출 압력 (Mpa)\nmotor_current: 모터 전류 (A)\nmotor_rpm: 모터 회전수 (rpm)\nmotor_temp: 모터 온도 (°C)\nmotor_vibe: 모터 진동 (mm/s)\ntype: 설비 번호\n\n설비 번호 [0, 4, 5, 6, 7]: 30HP(마력)\n설비 번호 1: 20HP\n설비 번호 2: 10HP\n설비 번호 3: 50HP\n'

In [4]:
import warnings

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, precision_score, recall_score
from sklearn.ensemble import IsolationForest
from sklearn.covariance import EllipticEnvelope
from sklearn.preprocessing import StandardScaler

from src.features import build_features

warnings.filterwarnings(action='ignore')


In [8]:
df = pd.read_csv(r'data\raw\train_data.csv')
test = pd.read_csv(r'data\raw\test_data.csv')

df = build_features.add_motor_hp(df)
df = build_features.add_air_flow_pressure(df)
df = build_features.add_motor_vibe_freq(df)
scaler = StandardScaler()
data_scaled = scaler.fit_transform(df)
train = pd.DataFrame(data_scaled, columns=df.columns)

test = build_features.add_motor_hp(test)
test = build_features.add_air_flow_pressure(test)
test = build_features.add_motor_vibe_freq(test)

# 모델 학습 및 이상치 탐지
model = IsolationForest(n_estimators=5000, max_samples='auto',
                        contamination=0.001, random_state=42)


model.fit(train.drop("type", axis=1))
test["anomaly"] = model.predict(test.drop("type", axis=1))


test.anomaly.value_counts()


 1    6814
-1     575
Name: anomaly, dtype: int64

In [17]:
df = pd.read_csv(r'data\raw\train_data.csv')
test = pd.read_csv(r'data\raw\test_data.csv')

df = build_features.add_motor_hp(df)
df = build_features.add_air_flow_pressure(df)
df = build_features.add_motor_vibe_freq(df)
scaler = StandardScaler()
data_scaled = scaler.fit_transform(df)
data_scaled_df = pd.DataFrame(data_scaled, columns=df.columns)

test = build_features.add_motor_hp(test)
test = build_features.add_air_flow_pressure(test)
test = build_features.add_motor_vibe_freq(test)


train, val = train_test_split(data_scaled_df, test_size=0.3, shuffle=True, random_state=42)

# 모델 학습 및 이상치 탐지
model = EllipticEnvelope(contamination=0.09)
model.fit(train.drop("type", axis=1))

val["anomaly"] = model.predict(val.drop("type", axis=1))
test["anomaly"] = model.predict(test.drop("type", axis=1))

y_true = val["type"].apply(lambda x: 1 if x == "anomaly" else 0)
y_pred = val["anomaly"].apply(lambda x: 1 if x == -1 else 0)
print(y_pred.value_counts())
macro_f1 = f1_score(y_true, y_pred, average="macro")
macro_precision = precision_score(y_true, y_pred, average="macro")
macro_recall = recall_score(y_true, y_pred, average="macro")

print("Macro F1-score: {:.4f}".format(macro_f1))
print("Macro Precision: {:.4f}".format(macro_precision))
print("Macro Recall: {:.4f}".format(macro_recall))

test.anomaly.value_counts()

0    678
1     61
Name: anomaly, dtype: int64
Macro F1-score: 0.4785
Macro Precision: 0.5000
Macro Recall: 0.4587


-1    7389
Name: anomaly, dtype: int64

In [14]:
df = pd.read_csv(r'C:\Project_MB\Anomaly-Detection-of-Air-Compressor\data\raw\train_data.csv')
test = pd.read_csv(r'C:\Project_MB\Anomaly-Detection-of-Air-Compressor\data\raw\test_data.csv')

In [17]:
df.describe()
test.describe()

Unnamed: 0,air_inflow,air_end_temp,out_pressure,motor_current,motor_rpm,motor_temp,motor_vibe,type
count,2463.0,2463.0,2463.0,2463.0,2463.0,2463.0,2463.0,2463.0
mean,2.128843,49.001137,0.7,27.490715,2372.576939,65.766707,3.452972,2.957369
std,1.202328,8.725091,2.964897e-14,13.604597,687.870774,7.797152,1.576386,2.230113
min,0.33,32.03,0.7,5.34,1201.0,50.36,1.97,0.0
25%,1.28,41.6,0.7,17.11,1784.5,59.15,2.88,1.0
50%,1.96,48.98,0.7,25.85,2358.0,65.64,3.26,3.0
75%,2.82,56.355,0.7,36.0,2971.0,72.275,3.73,5.0
max,6.22,64.96,0.7,72.0,3564.0,80.52,21.87,7.0


In [68]:
test_score = test.copy()
test_score["anomaly_score"] = model.decision_function(test_score)
test_score["anomaly"] = test_score["anomaly_score"].apply(lambda x: 0 if x > 0 else 1)

test_score.anomaly.value_counts()
test_score
submission = pd.read_csv(r'C:\Project_MB\Anomaly-Detection-of-Air-Compressor\data\raw\answer_sample.csv')
submission
submission["label"] = test_score["anomaly"]
submission.to_csv(r'C:\Project_MB\Anomaly-Detection-of-Air-Compressor\data\submission\submission.csv', index=False)