In [66]:
import numpy as np 
import pandas as pd 
import seaborn as sns 
import os
import matplotlib.pyplot as  plt 
from lazypredict.Supervised import LazyClassifier
from lazypredict.Supervised import LazyRegressor 
from imblearn.over_sampling import  SMOTE
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score 
import joblib

In [67]:
# !pip install lazypredict

In [68]:
df = pd.read_csv('creditcard.csv')
df.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0,-1,0,3,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,150,0
1,0,1,0,0,0,0,0,0,0,0,...,0,-1,0,0,0,0,0,0,3,0
2,1,-1,-1,2,0,-1,2,1,0,-2,...,0,1,1,-1,0,0,0,0,379,0
3,1,-1,0,2,-1,0,1,0,0,-1,...,0,0,0,-1,1,0,0,0,124,0
4,2,-1,1,2,0,0,0,1,0,1,...,0,1,0,0,0,1,0,0,70,0


In [69]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 284807 entries, 0 to 284806
Data columns (total 31 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   Time    284807 non-null  float64
 1   V1      284807 non-null  float64
 2   V2      284807 non-null  float64
 3   V3      284807 non-null  float64
 4   V4      284807 non-null  float64
 5   V5      284807 non-null  float64
 6   V6      284807 non-null  float64
 7   V7      284807 non-null  float64
 8   V8      284807 non-null  float64
 9   V9      284807 non-null  float64
 10  V10     284807 non-null  float64
 11  V11     284807 non-null  float64
 12  V12     284807 non-null  float64
 13  V13     284807 non-null  float64
 14  V14     284807 non-null  float64
 15  V15     284807 non-null  float64
 16  V16     284807 non-null  float64
 17  V17     284807 non-null  float64
 18  V18     284807 non-null  float64
 19  V19     284807 non-null  float64
 20  V20     284807 non-null  float64
 21  V21     28

In [70]:
# Set the option to display numbers in integer format
pd.set_option('display.float_format', lambda x: '%.0f' % x)
df.describe()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
count,284807,284807,284807,284807,284807,284807,284807,284807,284807,284807,...,284807,284807,284807,284807,284807,284807,284807,284807,284807,284807
mean,94814,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,88,0
std,47488,2,2,2,1,1,1,1,1,1,...,1,1,1,1,1,0,0,0,250,0
min,0,-56,-73,-48,-6,-114,-26,-44,-73,-13,...,-35,-11,-45,-3,-10,-3,-23,-15,0,0
25%,54202,-1,-1,-1,-1,-1,-1,-1,0,-1,...,0,-1,0,0,0,0,0,0,6,0
50%,84692,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,22,0
75%,139320,1,1,1,1,1,0,1,0,1,...,0,1,0,0,0,0,0,0,77,0
max,172792,2,22,9,17,35,73,121,20,16,...,27,11,23,5,8,4,32,34,25691,1


In [71]:
#Highly imbalance dataset 
df['Class'].value_counts()

Class
0    284315
1       492
Name: count, dtype: int64

In [72]:
df.shape 

(284807, 31)

#### Standard scaling dataset


In [73]:
column_to_be_scale = ['Time','Amount']
scaler = StandardScaler()
scale_column = df[column_to_be_scale]

scaled_data = scaler.fit_transform(scale_column)

scaled_df = pd.DataFrame(scaled_data, columns=column_to_be_scale)

df[column_to_be_scale] = scaled_df


#### Splitting dataset

In [74]:
x = df.drop('Class',axis=1)
y = df[['Class']]

#### Imbalance data handling 

In [75]:
df['Class'].value_counts()

Class
0    284315
1       492
Name: count, dtype: int64

In [76]:
# imbalance data handling
oversample=SMOTE()
X,Y=oversample.fit_resample(x,y)
Y.value_counts()

Class
0        284315
1        284315
Name: count, dtype: int64

#### Train test split

In [77]:
x_train,x_test,y_train,y_test = train_test_split(X,Y,test_size=0.2)

In [78]:
# importing machine algorithm
from sklearn.linear_model import LogisticRegression 
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier 
from sklearn.ensemble import AdaBoostClassifier 
from sklearn.neighbors import KNeighborsClassifier 
from xgboost import XGBClassifier


In [79]:
xgbc = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
lg   = LogisticRegression()
rdf  = RandomForestClassifier()
adb  = AdaBoostClassifier()
knn  = KNeighborsClassifier()
ml_algorithms = [xgbc,lg,rdf,adb,knn]

In [80]:
# Function to Train the machine learning algorithms
def train_model(train_data,test_data,models):
    train_history  = {"Algorithm":[],"Train_score":[],"Test_score":[],"Difference":[]}
    for model in models:
        #Training model
        model.fit(train_data[0],train_data[1])
        #getting score
        train_score = model.score(train_data[0],train_data[1])*100
        test_score = model.score(test_data[0],test_data[1])*100

        train_history['Algorithm'].append(model.__class__.__name__)
        train_history['Train_score'].append(train_score)
        train_history['Test_score'].append(test_score)
        train_history['Difference'].append(train_score - test_score)

        #saving the models
        cwd = os.getcwd()
        model_dir = os.path.join(cwd,'models')
        model_path = os.path.join(model_dir,model.__class__.__name__+'.lb')
        joblib.dumb(model,model.__class__.__name__+'lb')

    return train_history

#Data to be train the machine learning algorithms
train_data = (x_train,y_train)
test_data = (x_test,y_test)

#Train the model and get history of the models training

train_history = pd.DataFrame(train_model(train_data,test_data,ml_algorithms))
train_history



#### Using With Deep Learning Algorithms 


In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

In [None]:
scaler = StandardScaler()
X_train_transformed = scaler.fit_transform(x_train)
X_test_transformed = scaler.transform(x_test)

In [None]:
# Initialize the ANN model
model = Sequential()

# Add the input layer and the first hidden layer with batch normalization and dropout
model.add(Dense(units=64, activation='relu', input_dim=X_train_transformed.shape[1]))
model.add(BatchNormalization())
model.add(Dropout(0.5))

# Add the second hidden layer
model.add(Dense(units=32, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))

# Add the third hidden layer
model.add(Dense(units=16, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))

# Add the output layer
model.add(Dense(units=1, activation='sigmoid'))

# Compile the model with Adam optimizer and learning rate decay
optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

model.summary()


In [None]:
# Set up callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.0001)

callbacks = [early_stopping, reduce_lr]


In [None]:
# Train the model
history = model.fit(X_train_transformed, Y_train_transformed, validation_split=0.2, epochs=20, batch_size=32, callbacks=callbacks, verbose=1)


In [None]:
# Make predictions on the test set
y_pred = (model.predict(X_test_transformed) > 0.5).astype("int32")

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")


#### Hyper parameter turning

In [None]:
# model_and_parameter ={

# "Logistic_Regression" :{
#                         "model" : LogisticRegression(),
#                         "param" : {'penalty':['l1', 'l2', 'elasticnet'],
#                                     'solver' : ['newton-cg', 'lbfgs', 'liblinear']}
#                        },

# "Decision_Tree" :{
#                         "model" : DecisionTreeClassifier(),
#                         "param" : {'criterion' : ["gini", "entropy","log_loss"],
#                                   'splitter' : ["best", "random"],
#                                   'max_depth':range(4,16,1),
#                                   'min_samples_leaf':range(2,8,1),
#                                   'min_samples_split':range(2,8,1)}                 
#                  },

# "SVC" : {
#                         "model" : SVC(),
#                         "param" : {'C' :[1.0],
#                                    'kernel' : ['linear', 'poly', 'rbf'],
#                                    'gamma': ['scale','auto']}
                                  
#        },

# "KNeighborsClassifier" : {
#                         "model" : KNeighborsClassifier(),
#                         "param" : {'n_neighbors' : [3,5,7,9],
#                                   'algorithm' : ['auto', 'ball_tree', 'kd_tree', 'brute'],
#                                   'metric':['euclidian','minkowski']}
                                   
#                         },

# "GaussianNB" :  {
#                     "model" : GaussianNB(),
#                     "param" : {}
#                  },


# "RandomForestClassifier" :{
#                     "model" : RandomForestClassifier(),
#                     "param" : {'criterion' : ["gini", "entropy"],
#                             'n_estimators' : [80,100,120,135,150],
#                             'max_depth':range(4,16,1),
#                             'min_samples_leaf':range(1,8,1),
#                             'min_samples_split':range(2,8,1)
#                               }
#                       },

#  "AdaBoostClassifier" :{
#                     "model" : AdaBoostClassifier(),
#                     "param" : {'n_estimators' : [10,50,100],
#                                 'learning_rate':[1.0,0.02,0.04,0.4],}
#                       },

# "GradientBoostingClassifier" :{
#                     "model" : GradientBoostingClassifier(),
#                     "param" : {'n_estimators' : [10,50,100],
#                               'learning_rate':[1.0,0.02,0.04,0.4],
#                               'criterion' : ['friedman_mse', 'squared_error', 'mse'], 
#                               'min_samples_split':range(3,7,1),
#                               'min_samples_leaf':range(1,7,1),
#                               'max_depth':range(3,6,1)}
#                       },

# "XGBClassifier" :{
#                     "model" : XGBClassifier(),
#                     "param" : {}
# }

# }