<a href="https://colab.research.google.com/github/Ramjas-Langdi/Sampling-Assignment/blob/main/Ramjas_Sampling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedShuffleSplit, KFold
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import  ClusterCentroids
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

data_df = pd.read_csv('/content/Creditcard_data.csv')
X = data_df.drop('Class', axis=1)
y = data_df['Class']
smote = SMOTE(random_state=42)
X_balanced, y_balanced = smote.fit_resample(X, y)
X_train, X_test, y_train, y_test = train_test_split(X_balanced, y_balanced, test_size=0.2, random_state=42)

models = {
    'Decision Tree': DecisionTreeClassifier(),
    'XGBoost': XGBClassifier(),
    'KNN': KNeighborsClassifier(),
    'SVM': SVC(),
    'Gradient Boosting': GradientBoostingClassifier(),
}

sampling_techniques = {
    'Simple Random Sampling': 'simple',
    'Systematic Sampling': 'systematic',
    'ClusterCentroids': 'clustercentroids',
    'Cross Validation Sampling': 'cv',
    'Stratified Sampling': 'stratified'

}

results = {}
for model_name, model in models.items():
    model_results = []
    for technique_name, technique in sampling_techniques.items():

        if technique == 'simple':
            X_sampled, y_sampled = X_train, y_train
        elif technique == 'systematic':

            n = 5
            X_sampled, _, y_sampled, _ = train_test_split(X_train, y_train, test_size=1-(1/n))

        elif technique == 'cv':
            kf = KFold(n_splits=5, random_state=42, shuffle=True)
            for _, test_index in kf.split(X_train, y_train):
                X_sampled, y_sampled = X_train.iloc[test_index], y_train.iloc[test_index]

        elif technique == 'clustercentroids':

            cc = ClusterCentroids()
            X_sampled, y_sampled = cc.fit_resample(X_train, y_train)
        elif technique == 'stratified':

            strat_split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
            for train_index, _ in strat_split.split(X_train, y_train):
                X_sampled, y_sampled = X_train.iloc[train_index], y_train.iloc[train_index]



        model.fit(X_sampled, y_sampled)


        y_pred = model.predict(X_test)


        accuracy = accuracy_score(y_test, y_pred)
        model_results.append(accuracy)

    results[model_name] = model_results

results_df = pd.DataFrame(results, index=sampling_techniques.keys())
print(results_df)



                           Decision Tree   XGBoost       KNN       SVM  \
Simple Random Sampling          0.983660  0.990196  0.852941  0.686275   
Systematic Sampling             0.911765  0.964052  0.745098  0.653595   
ClusterCentroids                0.977124  0.986928  0.852941  0.686275   
Cross Validation Sampling       0.944444  0.983660  0.781046  0.650327   
Stratified Sampling             0.983660  0.983660  0.843137  0.679739   

                           Gradient Boosting  
Simple Random Sampling              0.990196  
Systematic Sampling                 0.970588  
ClusterCentroids                    0.983660  
Cross Validation Sampling           0.954248  
Stratified Sampling                 0.986928  
