In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from imblearn.over_sampling import SMOTE
import random
data = pd.read_csv("/kaggle/input/sampling-data/Creditcard_data.csv")
X = data.drop("Class", axis=1)
y = data["Class"]
smote = SMOTE(random_state=42)
X_res, y_res = smote.fit_resample(X, y)
X_train, X_test, y_train, y_test = train_test_split(X_res, y_res, test_size=0.2, random_state=42)
sample_size = int(0.5 * len(X_train))
random_indices = random.sample(range(len(X_train)), sample_size)
new_X_train = X_train.iloc[random_indices]
new_y_train = y_train.iloc[random_indices]
models = {
    "Random Forest": RandomForestClassifier(random_state=42),
    "Logistic Regression": LogisticRegression(max_iter=1000, random_state=42),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "KNN": KNeighborsClassifier(),
    "SVM": SVC()
}
accuracies = {}
for name, model in models.items():
    model.fit(new_X_train, new_y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    accuracies[name] = accuracy
for name, accuracy in accuracies.items():
    print(f"{name}: {accuracy:.4f}")


Random Forest: 0.9902
Logistic Regression: 0.9020
Decision Tree: 0.9706
KNN: 0.8170
SVM: 0.6569


In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedShuffleSplit
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from imblearn.over_sampling import SMOTE
data = pd.read_csv("/kaggle/input/sampling-data/Creditcard_data.csv")
X = data.drop("Class", axis=1)
y = data["Class"]
smote = SMOTE(random_state=42)
X_res, y_res = smote.fit_resample(X, y)
X_train, X_test, y_train, y_test = train_test_split(X_res, y_res, test_size=0.2, random_state=42)
splitted = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=42)
for train_index, _ in splitted.split(X_train, y_train):
    new_X_train = X_train.iloc[train_index]
    new_y_train = y_train.iloc[train_index]
models = {
    "Random Forest": RandomForestClassifier(random_state=42),
    "Logistic Regression": LogisticRegression(max_iter=1000, random_state=42),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "KNN": KNeighborsClassifier(),
    "SVM": SVC()
}
accuracies = {}
for name, model in models.items():
    model.fit(new_X_train, new_y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    accuracies[name] = accuracy
for name, accuracy in accuracies.items():
    print(f"{name}: {accuracy:.4f}")


Random Forest: 0.9935
Logistic Regression: 0.9052
Decision Tree: 0.9641
KNN: 0.8268
SVM: 0.6536


In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from imblearn.over_sampling import SMOTE
import numpy as np
data = pd.read_csv("/kaggle/input/sampling-data/Creditcard_data.csv")
X = data.drop("Class", axis=1)
y = data["Class"]
smote = SMOTE(random_state=42)
X_res, y_res = smote.fit_resample(X, y)
X_train, X_test, y_train, y_test = train_test_split(X_res, y_res, test_size=0.2, random_state=42)
num_clusters = 5
clusters = np.array_split(X_train.index, num_clusters)
clusters_taken = np.random.choice(range(num_clusters), size=2, replace=False)
row_numbers = np.concatenate([clusters[i] for i in clusters_taken])
new_X = X_train.loc[row_numbers]
new_y = y_train.loc[row_numbers]
models = {
    "Random Forest": RandomForestClassifier(random_state=42),
    "Logistic Regression": LogisticRegression(max_iter=1000, random_state=42),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "KNN": KNeighborsClassifier(),
    "SVM": SVC()
}
accuracies = {}
for name, model in models.items():
    model.fit(new_X, new_y)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    accuracies[name] = accuracy
for name, accuracy in accuracies.items():
    print(f"{name}: {accuracy:.4f}")

Random Forest: 0.9771
Logistic Regression: 0.9118
Decision Tree: 0.9542
KNN: 0.7810
SVM: 0.6667


In [15]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from imblearn.over_sampling import SMOTE
import numpy as np
data = pd.read_csv("/kaggle/input/sampling-data/Creditcard_data.csv")
X = data.drop("Class", axis=1)
y = data["Class"]
smote = SMOTE(random_state=42)
X_res, y_res = smote.fit_resample(X, y)
X_train, X_test, y_train, y_test = train_test_split(X_res, y_res, test_size=0.2, random_state=42)
n = 5
indices = np.arange(0, len(X_train), n)
new_X = X_train.iloc[indices]
new_y = y_train.iloc[indices]
models = {
    "Random Forest": RandomForestClassifier(random_state=42),
    "Logistic Regression": LogisticRegression(max_iter=1000, random_state=42),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "KNN": KNeighborsClassifier(),
    "SVM": SVC()
}
accuracies = {}
for name, model in models.items():
    model.fit(new_X, new_y)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    accuracies[name] = accuracy
for name, accuracy in accuracies.items():
    print(f"{name}: {accuracy:.4f}")


Random Forest: 0.9771
Logistic Regression: 0.8725
Decision Tree: 0.9216
KNN: 0.7059
SVM: 0.6601


In [16]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from imblearn.over_sampling import SMOTE
import numpy as np
from sklearn.utils import resample
data = pd.read_csv("/kaggle/input/sampling-data/Creditcard_data.csv")
X = data.drop("Class", axis=1)
y = data["Class"]
smote = SMOTE(random_state=42)
X_res, y_res = smote.fit_resample(X, y)
X_train, X_test, y_train, y_test = train_test_split(X_res, y_res, test_size=0.2, random_state=42)
X_train_sampled, y_train_sampled = resample(X_train, y_train, replace=True, random_state=42)
models = {
    "Random Forest": RandomForestClassifier(random_state=42),
    "Logistic Regression": LogisticRegression(max_iter=1000, random_state=42),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "KNN": KNeighborsClassifier(),
    "SVM": SVC()
}
accuracies = {}
for name, model in models.items():
    model.fit(X_train_sampled, y_train_sampled)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    accuracies[name] = accuracy
for name, accuracy in accuracies.items():
    print(f"{name}: {accuracy:.4f}")


Random Forest: 0.9935
Logistic Regression: 0.9183
Decision Tree: 0.9608
KNN: 0.8333
SVM: 0.6830
