In [2]:
# import necessary libraries
import pandas as pd

from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold


In [3]:
# read in the data
df = pd.read_csv('../../datasets/creditcard.csv')
# specify features and target variable
X = df.drop(columns='Class')
y = df['Class']

In [4]:
# split the data into train and holdout sets
X_train, X_holdout, y_train, y_holdout = train_test_split(X, y, test_size=0.2, stratify=y, random_state=1)

In [10]:
# specify the models to evaluate
models = [('logit', LogisticRegression(max_iter = 1000, random_state=1)), ('NB', GaussianNB())\
          , ('KNN', KNeighborsClassifier(n_jobs=-1)), ('DT', DecisionTreeClassifier(random_state=1))\
            , ('SVM', SVC(random_state=1))]

In [13]:
# evaluate each model using cross-validation
cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=1)
for name, model in models:
    scores = cross_val_score(model, X_train, y_train, cv=cv, scoring='f1_macro')
    print(f'{name}: {scores.mean()}')

logit: 0.8449829677758091
NB: 0.6141593607164253
KNN: 0.5534817928192173
DT: 0.8681525183139875
SVM: 0.49956731448888075
