APPROACH
---

* Import libaries
*  Load Dataset
* Analyze the Data
*  Perform EDA
*  Applying the models
*  Comparing Accuracies

**IMPORTING LIBRARIES**

---

In [None]:
!pip install -q gradio

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns 
from imblearn.combine import SMOTETomek
import gradio as gr
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
from scipy.stats import chi2_contingency 
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier,AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

**LOADING THE DATASET**

---

In [None]:
data = pd.read_csv('/kaggle/input/creditcardfraud/creditcard.csv')

In [None]:
data.head()

In [None]:
data.shape

**ANALYZING THE DATA**

---

In [None]:
data.info()

In [None]:
data.isnull().sum()

> * we infer that there are no null values

In [None]:
data['Class'].value_counts()

> * We infer that the data is highly imbalanced

In [None]:
data.describe()

In [None]:
def summary(df):
    sum = pd.DataFrame(df.dtypes, columns=['dtypes'])
    sum['missing#'] = df.isna().sum().values*100
    sum['missing%'] = (df.isna().sum().values*100)/len(df)
    sum['uniques'] = df.nunique().values
    sum['count'] = df.count().values
    sum['skew'] = df.skew().values
    desc = pd.DataFrame(df.describe().T)
    sum['min'] = desc['min']
    sum['max'] = desc['max']
    sum['mean'] = desc['mean']
    return sum

summary(data).style.background_gradient(cmap='Blues')

In [None]:
data.skew()

> * I need to remove the outliers from the data

**PERFORMING EDA**

---

In [None]:
def remove_outlier(col):
    sorted(col)
    Q1,Q3=col.quantile([0.25,0.75])
    IQR=Q3-Q1
    lower_range=Q1-1.5*IQR
    upper_range=Q3+1.5*IQR
    return lower_range,upper_range




In [None]:
low_leadtime,high_leadtime=remove_outlier(data['V28'])
data['V28']=np.where(data['V28']>high_leadtime,high_leadtime,data['V28'])
data['V28']=np.where(data['V28']<low_leadtime,low_leadtime,data['V28'])

In [None]:
low_leadtime,high_leadtime=remove_outlier(data['V8'])
data['V8']=np.where(data['V8']>high_leadtime,high_leadtime,data['V8'])
data['V8']=np.where(data['V8']<low_leadtime,low_leadtime,data['V8'])

In [None]:
low_leadtime,high_leadtime=remove_outlier(data['V2'])
data['V2']=np.where(data['V2']>high_leadtime,high_leadtime,data['V2'])
data['V2']=np.where(data['V2']<low_leadtime,low_leadtime,data['V2'])

In [None]:
low_leadtime,high_leadtime=remove_outlier(data['V23'])
data['V23']=np.where(data['V23']>high_leadtime,high_leadtime,data['V23'])
data['V23']=np.where(data['V23']<low_leadtime,low_leadtime,data['V23'])

In [None]:
data.skew()

> * The outliers have been removed and now we can proceed

In [None]:
summary(data).style.background_gradient(cmap='Blues')

> * Now we upscale the data for proper balance

In [None]:
data['Class'].value_counts()

In [None]:
x = data.drop(['Class'], axis=1)
y = data['Class']

In [None]:
print(x.shape, y.shape)

In [None]:
from imblearn.combine import SMOTETomek
smt = SMOTETomek()
x, y = smt.fit_resample(x, y)

In [None]:
print(x.shape, y.shape)

In [None]:
sns.countplot(x=y)
plt.show()

**APPLYING MODELS**
---

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=2)

DTREE

In [None]:
dt = DecisionTreeClassifier()
dt.fit(x_train, y_train)
predttrain = dt.predict(x_train)
predttest = dt.predict(x_test)

RANDOM FOREST CLASSIFIER

In [None]:
rf = RandomForestClassifier()
rf.fit(x_train, y_train)
pred_rf_train = rf.predict(x_train)
pred_rf_test = rf.predict(x_test)

GRADIENT BOOSTING

In [None]:
gb = GradientBoostingClassifier()
gb.fit(x_train, y_train)
predgbtrain = gb.predict(x_train)
predgbtest = gb.predict(x_test)

**COMPARING ACCURACIES**

---

> * Accuracy report function

In [None]:
def acc_report(actual,predicted):
    acc_score=accuracy_score(actual,predicted)
    cm_matrix=confusion_matrix(actual,predicted)
    class_rep=classification_report(actual,predicted)
    print('the accuracy of tha model is ',acc_score)
    print(cm_matrix)
    print(class_rep)

DTREE

In [None]:
acc_report(y_train, predttrain)
acc_report(y_test, predttest)

RANDOM FOREST/


In [None]:
acc_report(y_train, pred_rf_train)
acc_report(y_test, pred_rf_test)

GRADIENT BOOSTING

In [None]:
acc_report(y_train, predgbtrain)
acc_report(y_test, predgbtest)