In [10]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import precision_score, recall_score, accuracy_score, f1_score, confusion_matrix,roc_auc_score
import warnings
warnings.filterwarnings("ignore")

import pandas as pd

def model_basic(x_train, y_train, x_test, y_test):
    models = [
        LogisticRegression(),
        DecisionTreeClassifier(),
        SVC(),
        RandomForestClassifier(),
        XGBClassifier(),
        
    ]

    rdict={'model':[],'accuracy':[],'precision':[],'recall':[],'f1_score':[]}

    
    for clf in models:
        clf = clf.fit(x_train, y_train)
        pred = clf.predict(x_test)
        pred_proba = clf.predict_proba(x_test)[:,1].reshape(-1,1)
        results = (round(accuracy_score(y_test, pred),4),
                    round(precision_score(y_test, pred),4),
                    round(recall_score(y_test, pred),4),
                    round(f1_score(y_test, pred),4),
        )

        rdict['model'].append(clf); 
        rdict['accuracy'].append(results[0])
        rdict['precision'].append(results[1])
        rdict['recall'].append(results[2])
        rdict['f1_score'].append(results[3])
        

        # print(results)
        confusion= confusion_matrix(y_test,pred)
        print(confusion)

    rdf = pd.DataFrame(data=rdict)
    return rdf 

In [11]:
train_df = pd.read_csv('./train_final_over.csv', index_col=0)
test_df = pd.read_csv('./test_final.csv', index_col=0)
test_df.drop('Unnamed: 0', axis=1, inplace=True)
test_df.head()

Unnamed: 0,거래량,이상거래,카드거래발생거리,실소유자나이,카드거래간시간,중서부,북동부,남부,서부,상반기,...,category_grocery_pos,category_health_fitness,category_home,category_kids_pets,category_misc_net,category_misc_pos,category_personal_care,category_shopping_net,category_shopping_pos,category_travel
0,-0.790139,0,-1.933556,0.289829,-0.634428,0,0,1,0,1,...,0,0,0,0,0,0,1,0,0,0
1,-0.718225,0,1.071493,-1.021992,-0.634428,0,0,0,1,1,...,0,0,0,0,0,0,1,0,0,0
2,-0.687732,0,-0.642796,0.170572,-0.634428,0,1,0,0,1,...,0,1,0,0,0,0,0,0,0,0
3,-0.637702,0,-1.81625,-0.843107,-0.634428,0,0,1,0,1,...,0,0,0,0,0,1,0,0,0,0
4,-0.78926,0,1.049432,1.064996,-0.634428,1,0,0,0,1,...,0,0,0,0,0,0,0,0,0,1


In [None]:
train_df = pd.read_csv('./train_final_over.csv', index_col=0)
test_df = pd.read_csv('./test_final.csv', index_col=0)

train_df = train_df[['거래량','주간','카드거래간시간','하반기','category_entertainment', 'category_food_dining',
       'category_gas_transport', 'category_grocery_net',
       'category_grocery_pos', 'category_health_fitness', 'category_home',
       'category_kids_pets', 'category_misc_net', 'category_misc_pos',
       'category_personal_care', 'category_shopping_net',
       'category_shopping_pos', 'category_travel','중서부','남부','서부','북동부','이상거래']]
x_train = train_df.drop('이상거래',axis=1)
y_train = train_df[['이상거래']]

test_df = test_df[['거래량','주간','카드거래간시간','하반기','category_entertainment', 'category_food_dining',
       'category_gas_transport', 'category_grocery_net',
       'category_grocery_pos', 'category_health_fitness', 'category_home',
       'category_kids_pets', 'category_misc_net', 'category_misc_pos',
       'category_personal_care', 'category_shopping_net',
       'category_shopping_pos', 'category_travel','중서부','남부','서부','북동부','이상거래']]
x_test = test_df.drop(['이상거래'],axis=1)
y_test = test_df[['이상거래']]

model_basic(x_train, y_train, x_test, y_test)