In [None]:
import pandas as pd
train_df = pd.read_csv('./data/train.csv')
val_df = pd.read_csv('./data/val.csv')
test_df = pd.read_csv('./data/test.csv')
# val_df 의 오염도 계산
# Class=0 정상거래, Class=1 부정거래
val_normal, val_fraud = val_df['Class'].value_counts()
val_contamination = val_fraud / val_normal
print(f'Validation contamination : [{val_contamination}]')

In [None]:
from pycaret.anomaly import *
s = setup(data = train_df.drop(columns=['ID']),remove_multicollinearity = True,multicollinearity_threshold = 0.90, normalize = True,silent= True)

In [None]:
iforest = create_model('iforest', fraction = val_contamination)
print(iforest)

In [None]:
models()

In [None]:
plot_model(iforest, plot = 'umap')

In [None]:
result = assign_model(iforest)
result.head()

In [None]:
save_model(model = iforest, model_name = 'iforest_model')

In [None]:
loaded_model = load_model('iforest_model')

## Evaluation : Validation set


In [None]:
from sklearn.metrics import f1_score
from sklearn.metrics import classification_report

val_x = val_df.drop(columns=['ID', 'Class']) # Input Data
val_y = val_df['Class'] # Label

val_pred = loaded_model.predict(val_x) # model prediction
val_score = f1_score(val_y, val_pred, average='macro')
print(f'Validation F1 Score : [{val_score}]')
print(classification_report(val_y, val_pred))

In [None]:
temp_df = pd.DataFrame(val_pred)
temp_df

## Inference : Test set

In [None]:
test_x = test_df.drop(columns=['ID'])

In [None]:
test_pred = loaded_model.predict(test_x) # model prediction

## Submission

In [None]:
submit = pd.read_csv('./sample_submission.csv')
submit.head()

In [None]:
submit['Class'] = test_pred
submit.to_csv('./submithwansoo2.csv', index=False)