# Project: 신용카드 사기 거래 탐지 AI 경진대회

### 요약
- DACON 대회 홈페이지에 업로드 되어있는 sample code
- IsolationForest 모델링

#### Author: 김민수(kimminsu.ds@gmail.com)

## 환경설정

### 패키지

In [13]:
import pandas as pd
import numpy  as np

from sklearn.ensemble import IsolationForest
from sklearn.metrics  import f1_score, classification_report

import matplotlib.pyplot as plt
import warnings

warnings.filterwarnings(action='ignore')

### 데이터

In [14]:
train = pd.read_csv("./data/train.csv")
valid = pd.read_csv("./data/val.csv")
test = pd.read_csv("./data/test.csv")

#### 이상거래 비율 확인

In [15]:
valid_normal, valid_fraud = valid['Class'].value_counts()
valid_contamination = valid_fraud / valid_normal

In [17]:
train_x = train.drop(columns=['ID'])

## 모델링 - IsolationForest

In [18]:
model = IsolationForest(n_estimators=125, max_samples=len(train_x), contamination=valid_contamination, random_state=42, verbose=0)
model.fit(train_x)

IsolationForest(behaviour='deprecated', bootstrap=False,
                contamination=0.0010551491277433877, max_features=1.0,
                max_samples=113842, n_estimators=125, n_jobs=None,
                random_state=42, verbose=0, warm_start=False)

#### 예측값 생성

In [None]:
valid_x = valid.drop(columns=['ID', 'Class']) # Input Data
valid_y = valid['Class'] # Label

#### 성능 측정 지표 확인

In [None]:
def get_pred_label(model_pred):
    # IsolationForest 모델 출력 (1:정상, -1:불량(사기)) 이므로 (0:정상, 1:불량(사기))로 Label 변환
    model_pred = np.where(model_pred == 1, 0, model_pred)
    model_pred = np.where(model_pred == -1, 1, model_pred)
    return model_pred

In [20]:
valid_pred = model.predict(valid_x) # model prediction
valid_pred = get_pred_label(valid_pred)
valid_score = f1_score(valid_y, valid_pred, average='macro')
print(f'valididation F1 Score : [{valid_score}]')
print(classification_report(valid_y, valid_pred))

valididation F1 Score : [0.6963959681062595]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     28432
           1       0.39      0.40      0.39        30

    accuracy                           1.00     28462
   macro avg       0.69      0.70      0.70     28462
weighted avg       1.00      1.00      1.00     28462



## 최종 결과 생성

#### 예측값 생성

In [21]:
test_x = test.drop(columns=['ID'])
test_pred = model.predict(test_x) # model prediction
test_pred = get_pred_label(test_pred)

#### 제출 파일 저장

In [24]:
submit = pd.read_csv('./data/sample_submission.csv')
submit['Class'] = test_pred
submit.to_csv('./submit.csv', index=False)

Unnamed: 0,ID,Class
0,AAAA0x1,1
1,AAAA0x2,1
2,AAAA0x5,1
3,AAAA0x7,1
4,AAAA0xc,1
