In [None]:
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle
!chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets download -d mlg-ulb/creditcardfraud
!unzip creditcardfraud.zip

mkdir: cannot create directory ‘/root/.kaggle’: File exists
Dataset URL: https://www.kaggle.com/datasets/mlg-ulb/creditcardfraud
License(s): DbCL-1.0
Downloading creditcardfraud.zip to /content
  0% 0.00/66.0M [00:00<?, ?B/s]
100% 66.0M/66.0M [00:00<00:00, 1.21GB/s]
Archive:  creditcardfraud.zip
  inflating: creditcard.csv          


In [None]:
import pandas as pd
import numpy as np

# Data Preprocessisng

In [None]:
dataset = pd.read_csv('creditcard.csv')

In [None]:
dataset.groupby('Time')['Amount'].sum()

Unnamed: 0_level_0,Amount
Time,Unnamed: 1_level_1
0.0,152.31
1.0,502.16
2.0,73.66
4.0,4.99
7.0,134.00
...,...
172785.0,2.69
172786.0,0.77
172787.0,24.79
172788.0,77.88


In [None]:
dataset.isna().values.any()

np.False_

In [None]:
dataset.isna().sum()

Unnamed: 0,0
Time,0
V1,0
V2,0
V3,0
V4,0
V5,0
V6,0
V7,0
V8,0
V9,0


In [None]:
dataset.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [None]:
dataset['Class'].value_counts()

Unnamed: 0_level_0,count
Class,Unnamed: 1_level_1
0,284315
1,492


In [None]:
X = dataset.iloc[:,:-1].values
y = dataset.iloc[:,-1].values

In [None]:
## Don't Execute Just for understanding Imputing
from sklearn.impute import SimpleImputer
impute = SimpleImputer(missing_values=np.nan, strategy='mean')
X[:,1:3] = impute.fit_transform(X[:, 1:3])

In [None]:
## Don't Execute Just for understanding OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [0])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

In [None]:
## Don't Execute Just For understanding LabelEncoding
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y = le.fit_transform(y)

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.25, random_state=1)

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [None]:
from imblearn.over_sampling import SMOTE
from sklearn.utils import shuffle
smote = SMOTE(random_state=1)
X_resample_train, y_resample_train = smote.fit_resample(X_train, y_train)
X_resample_train, y_resample_train = shuffle(X_resample_train, y_resample_train, random_state=1)

# SVC

In [None]:
from sklearn.svm import SVC
classifier = SVC(kernel='rbf',class_weight='balanced', random_state=1)
classifier.fit(X_resample_train, y_resample_train)

In [None]:
y_pred = classifier.predict(X_test)

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score,classification_report
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[70207   884]
 [   21    90]]


0.9872896828740766

In [None]:
print(classification_report(y_test , y_pred, target_names = ['not frauds', 'frauds']))

              precision    recall  f1-score   support

  not frauds       1.00      0.99      0.99     71091
      frauds       0.09      0.81      0.17       111

    accuracy                           0.99     71202
   macro avg       0.55      0.90      0.58     71202
weighted avg       1.00      0.99      0.99     71202



# K-Cross Validation

In [None]:
from sklearn.model_selection import cross_val_score
kcross = cross_val_score(classifier, X=X_resample_train, y=y_resample_train, cv=10, n_jobs = -1)
print("accuracy is: {:.2f}%".format(kcross.mean()*100))
print("std is: {:.2f}%".format(kcross.std()*100))

KeyboardInterrupt: 

# Parameter Selection Using GridSearch

In [None]:
from sklearn.model_selection import GridSearchCV
parameters = [{'C':[0.25, 0.5, 0.75, 1],'kernel':'linear'},
              {'C':[0.25, 0.5, 0.75, 1],'kernel':'linear', 'gamma':[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] }]
grid = GridSearchCV(estimator=classifier, param_grid=parameters, scoring='accuracy', cv=10)
grid.fit(X_resample_train, y_resample_train)
accuracy = grid.best_score_
estimate = grid.best_params_
print("best accuracy: {:.2f}%".format(accuracy*100))
print(f"best params: {estimate}")

# PCA

In [None]:
## Don't execute Just for understanding dimensionality reduction (Unsupervised using PCA)
from sklearn.decomposition import KernelPCA
kpca = KernelPCA(n_components=2)
X_resample_train = kpca.fit_transform(X_resample_train)
X_test = kpca.transform(X_test)

# LDA

In [None]:
## Don't execute for understanding dimensionality reduction (supervised using LDA)
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
lda = LDA(n_components=2)
X_resample_train = lda.fit_transform(X_resample_train, y_resample_train)
X_test = lda.transform(X_test)

# Balance The Dataset and use ANN for prediction

In [None]:
frauds = dataset.query('Class == 0')
not_frauds = dataset.query('Class == 1')

In [None]:
balanced_dataset = pd.concat([frauds,not_frauds.sample(len(frauds),random_state = 1)])

In [None]:
balanced_dataset['Class'].value_counts()

In [None]:
balanced_dataset = balanced_dataset(frac=1, random_state = 1)

In [None]:
Xb = balanced_dataset.iloc[:, :-1].values
yb = balanced_dataset.iloc[:, -1].values

In [None]:
Xb_train, Xb_test, yb_train, yb_test = train_test_split(Xb,yb, test_size=0.25, random_state=1)

In [None]:
scb = StandardScaler()
Xb_train = scb.fit_transform(Xb_train)
Xb_test = scb.transform(Xb_test)

In [None]:
import tensorflow as tf

In [None]:
tf.__version__

In [None]:
ann = tf.keras.layers.Squential()

In [None]:
ann.add(tf.keras.layers.Dense(units=6, activation='relu'))

In [None]:
ann.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

In [None]:
ann.compile(optimizer = 'adam', loss= 'binary_crossentropy', metrics=['accuracy'])

In [None]:
early_stopping = tf.keras.callbacks.Early_stopping(monitor = 'val_loss', patience = 5,restore_best_weights=True)

In [None]:
ann.fit(Xb_train, yb_train, validation_split=0.2, callbacks = early_stopping,batch_size=32, epochs=40)

In [None]:
yb_pred = ann.predict(X_test)

In [None]:
yb_pred = (yb_pred > 0.5)

In [None]:
cmb = confusion_matrix(y_test, yb_pred)
print(cmb)
print(classification_report(y_test, y_pred, target_names = ['not frauds', 'frauds']))