<a href="https://colab.research.google.com/github/Khushi-dahiya08/CreditCard/blob/main/CreditCard.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.combine import SMOTEENN

# Load dataset
data = pd.read_csv("Creditcard_data.csv")

# Balancing the dataset using undersampling
def balance_dataset(data):
    X = data.drop(columns=['Class'])
    y = data['Class']
    rus = RandomUnderSampler(random_state=42)
    X_resampled, y_resampled = rus.fit_resample(X, y)
    return pd.concat([pd.DataFrame(X_resampled, columns=X.columns), pd.DataFrame(y_resampled, columns=['Class'])], axis=1)

balanced_data = balance_dataset(data)

# Dynamic sample size
available_data_size = len(balanced_data)
sample_size = min(100, available_data_size)

# sampling methods
def random_sampling(data, size):
    return data.sample(n=size, random_state=42) if size <= len(data) else data

def systematic_sampling(data, size):
    step = len(data) // size if size <= len(data) else 1
    return data.iloc[::step][:size]

def stratified_sampling(data, size):
    groups = data.groupby('Class')
    return groups.apply(lambda x: x.sample(int(size / len(groups)), random_state=42))[:size]

def smote_sampling(data, size):
    X = data.drop(columns=['Class'])
    y = data['Class']
    smote = SMOTE(random_state=42, sampling_strategy=min(size / len(data), 1.0))
    X_resampled, y_resampled = smote.fit_resample(X, y)
    return pd.concat([pd.DataFrame(X_resampled, columns=X.columns), pd.DataFrame(y_resampled, columns=['Class'])], axis=1)

def smote_enn_sampling(data, size):
    X = data.drop(columns=['Class'])
    y = data['Class']
    smote_enn = SMOTEENN(random_state=42)
    X_resampled, y_resampled = smote_enn.fit_resample(X, y)
    if len(X_resampled) < size:
        size = len(X_resampled)
    return pd.concat([pd.DataFrame(X_resampled, columns=X.columns), pd.DataFrame(y_resampled, columns=['Class'])], axis=1).sample(n=size, random_state=42)

# Sampling methods
sampling_methods = [random_sampling, systematic_sampling, stratified_sampling, smote_sampling, smote_enn_sampling]
sampling_names = ['Random Sampling', 'Systematic Sampling', 'Stratified Sampling', 'SMOTE Sampling', 'SMOTE-ENN Sampling']

#  machine learning models
models = {
    "Logistic Regression": LogisticRegression(random_state=42),
    "Random Forest": RandomForestClassifier(random_state=42),
    "SVM": SVC(random_state=42),
    "KNN": KNeighborsClassifier(),
    "Decision Tree": DecisionTreeClassifier(random_state=42)
}

# Running sampling methods on models
results = []

for i, sampling_method in enumerate(sampling_methods):
    try:
        print(f"Applying {sampling_names[i]}...")
        sampled_data = sampling_method(balanced_data, size=sample_size)
        X_sample = sampled_data.drop(columns=['Class'])
        y_sample = sampled_data['Class']

        X_train, X_test, y_train, y_test = train_test_split(X_sample, y_sample, test_size=0.2, random_state=42)

        for model_name, model in models.items():
            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)
            accuracy = accuracy_score(y_test, y_pred)
            results.append({
                "Sampling Method": sampling_names[i],
                "Model": model_name,
                "Accuracy": accuracy
            })
            print(f"{sampling_names[i]} + {model_name}: Accuracy = {accuracy:.2f}")
    except Exception as e:
        print(f"Error in {sampling_names[i]}: {e}")

# Converting results to DataFrame for better visualization
results_df = pd.DataFrame(results)
print("\nSummary of Results:")
print(results_df)


Applying Random Sampling...
Random Sampling + Logistic Regression: Accuracy = 0.50
Random Sampling + Random Forest: Accuracy = 0.50
Random Sampling + SVM: Accuracy = 0.75
Random Sampling + KNN: Accuracy = 0.50
Random Sampling + Decision Tree: Accuracy = 0.50
Applying Systematic Sampling...
Systematic Sampling + Logistic Regression: Accuracy = 0.25


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Systematic Sampling + Random Forest: Accuracy = 0.25
Systematic Sampling + SVM: Accuracy = 0.00
Systematic Sampling + KNN: Accuracy = 0.25
Systematic Sampling + Decision Tree: Accuracy = 0.75
Applying Stratified Sampling...
Stratified Sampling + Logistic Regression: Accuracy = 0.25


  return groups.apply(lambda x: x.sample(int(size / len(groups)), random_state=42))[:size]
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Stratified Sampling + Random Forest: Accuracy = 0.00
Stratified Sampling + SVM: Accuracy = 0.00
Stratified Sampling + KNN: Accuracy = 0.25
Stratified Sampling + Decision Tree: Accuracy = 0.75
Applying SMOTE Sampling...
Error in SMOTE Sampling: The specified ratio required to remove samples from the minority class while trying to generate new samples. Please increase the ratio.
Applying SMOTE-ENN Sampling...
Error in SMOTE-ENN Sampling: With n_samples=0, test_size=0.2 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.

Summary of Results:
        Sampling Method                Model  Accuracy
0       Random Sampling  Logistic Regression      0.50
1       Random Sampling        Random Forest      0.50
2       Random Sampling                  SVM      0.75
3       Random Sampling                  KNN      0.50
4       Random Sampling        Decision Tree      0.50
5   Systematic Sampling  Logistic Regression      0.25
6   Systematic Sa