In [63]:
import pandas as pd
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import RandomOverSampler, SMOTE, ADASYN
from imblearn.under_sampling import RandomUnderSampler, TomekLinks
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.svm import SVC

In [64]:
df = pd.read_csv("./CreditCard_data.csv")

In [65]:
class_counts = df["Class"].value_counts()

In [66]:
majority_class = class_counts.idxmax()
minority_class = class_counts.idxmin()

In [67]:
X = df.drop("Class", axis=1)
y = df["Class"]

In [68]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=40)

In [69]:
samplings = [
    SMOTE(random_state=40),
    RandomUnderSampler(random_state=40),
    RandomOverSampler(random_state=40),
    ADASYN(random_state=40),
    TomekLinks()
]

In [70]:
# Create a list of five different machine learning models
models = [
    DecisionTreeClassifier(random_state=40),
    KNeighborsClassifier(n_neighbors=3),
    LogisticRegression(random_state=40),
    RandomForestClassifier(random_state=40),
    SVC(random_state=40)
]

In [71]:
accuracy_dict = {}

In [72]:
for sampling in samplings:
    accuracy_dict[sampling] = {}
    for model in models:
        # Apply the sampling technique to the training set
        X_train_resampled, y_train_resampled = sampling.fit_resample(X_train, y_train)

        # Train the machine learning model on the resampled training set
        model.fit(X_train_resampled, y_train_resampled)

        # Make predictions on the testing set
        y_pred = model.predict(X_test)

        # Compute the accuracy score
        accuracy = accuracy_score(y_test, y_pred)
        
        # Add the accuracy score to the dictionary
        accuracy_dict[sampling][model] = accuracy

# Convert the accuracy dictionary to a DataFrame and print the table
accuracy_df = pd.DataFrame(accuracy_dict)
print(accuracy_df)


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

                                                    SMOTE(random_state=40)  \
DecisionTreeClassifier(random_state=40)                           0.987097   
KNeighborsClassifier(n_neighbors=3)                               0.767742   
LogisticRegression(random_state=40)                               0.929032   
(DecisionTreeClassifier(max_features='auto', ra...                0.987097   
SVC(random_state=40)                                              0.535484   

                                                    RandomUnderSampler(random_state=40)  \
DecisionTreeClassifier(random_state=40)                                        0.722581   
KNeighborsClassifier(n_neighbors=3)                                            0.541935   
LogisticRegression(random_state=40)                                            0.677419   
(DecisionTreeClassifier(max_features='auto', ra...                             0.638710   
SVC(random_state=40)                                                        

In [73]:
# Find the sampling technique with the highest accuracy score
best_sampling = max(accuracy_dict, key=lambda x: max(accuracy_dict[x].values()))

# Find the model with the highest accuracy score for the best sampling technique
best_model = max(accuracy_dict[best_sampling], key=accuracy_dict[best_sampling].get)

# Print the best sampling technique and best model
print("We got the best result by combining {} sampling Over the {} model".format(best_sampling,best_model))

We got the best result by combining SMOTE(random_state=40) sampling Over the DecisionTreeClassifier(random_state=40) model
