In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv('/kaggle/input/creditcardfraud/creditcard.csv')
df.shape

In [None]:
df.columns

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
df.isnull().values.any()

In [None]:
df.info()

In [None]:
df_fraud = df[df['Class'] == 1] # Recovery of fraud data
plt.figure(figsize=(16,8))
plt.scatter(df_fraud['Time'], df_fraud['Amount']) # Display fraud amounts according to their time
plt.title('Scratter plot amount fraud')
plt.xlabel('Time')
plt.ylabel('Amount')
plt.xlim([0,175000])
plt.ylim([0,2500])
plt.show()

In [None]:
number_fraud = len(df[df.Class == 1])
number_no_fraud = len(df[df.Class == 0])
print('There are only '+ str(number_fraud) + ' frauds in the original dataset, even though there are ' + str(number_no_fraud) +' no frauds in the dataset.')

In [None]:
df_corr = df.corr()

In [None]:
import seaborn
plt.figure(figsize=(16,8))
seaborn.heatmap(df_corr, cmap="YlGnBu") # Displaying the Heatmap
seaborn.set(font_scale=2,style='white')

plt.title('Heatmap correlation')
plt.show()

In [None]:
Fraud = df[df['Class']==1]
Normal = df[df['Class']==0]

In [None]:
pd.concat([Normal.Amount.describe(), Normal.Time.describe()],  axis=1)

In [None]:
pd.concat([Fraud.Amount.describe(), Fraud.Time.describe()],  axis=1)

In [None]:
print('Fraud Shape:\t', Fraud.shape)
print('Normal Shape:\t', Normal.shape)

**Sample Selection**

In [None]:
from sklearn.preprocessing import StandardScaler, RobustScaler

std_scaler = StandardScaler()
rob_scaler = RobustScaler()

df['amount_scale'] = rob_scaler.fit_transform(df['Amount'].values.reshape(-1,1))
df['time_scale'] = rob_scaler.fit_transform(df['Time'].values.reshape(-1,1))

df.drop(['Time','Amount'], axis=1, inplace=True)

In [None]:
amount_scale = df['amount_scale']
time_scale = df['time_scale']

df.drop(['amount_scale', 'time_scale'], axis=1, inplace=True)
df.insert(0, 'amount_scale', amount_scale)
df.insert(1, 'time_scale', time_scale)

df.head()

In [None]:
df = df.sample(frac=1)

fraud = df.loc[df['Class'] == 1]
normal = df.loc[df['Class'] == 0][:492]

normal_distributed_data = pd.concat([fraud, normal])

sample_data = normal_distributed_data.sample(frac=1, random_state=42)

sample_data.head()

In [None]:
sample_data.shape

**SVM Model Building**

In [None]:
X = sample_data.drop('Class', axis=1)
y = sample_data['Class']

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
from sklearn.svm import SVC
svm_model = SVC()

In [None]:
svm_params = {"C": np.arange(1,10), "kernel":["linear", "rbf"]}

In [None]:
from sklearn.model_selection import GridSearchCV
svm_cv_model = GridSearchCV(svm_model, svm_params, cv=7, n_jobs=-1, verbose=7).fit(X_train, y_train)

In [None]:
svm_cv_model.best_score_

In [None]:
best_params = svm_cv_model.best_params_
print(best_params)

In [None]:
svm = SVC(C = best_params['C'], kernel=best_params['kernel'], probability=True).fit(X_train, y_train)

In [None]:
y_pred_svm = svm.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred_svm)

In [None]:
from sklearn.model_selection import cross_val_score
cross_val_score(svm, X_test, y_test, cv=21).mean()

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred_svm))

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred_svm)
sns.heatmap(cm, annot=True, fmt="d", cbar=False)
plt.title('SVC Confusion Matrix')
plt.savefig('svc_con_mat')
plt.show()