In [1]:
import pandas as pd

In [2]:
data = pd.read_csv("creditcard.csv")

In [None]:
data.head()

In [4]:
pd.options.display.max_columns = None

In [None]:
data.head()

In [None]:
data.tail()

In [None]:
data.shape

In [None]:
print("Number of columns: {}".format(data.shape[1]))
print("Number of rows: {}".format(data.shape[0]))

In [None]:
data.info()

In [None]:
data.isnull().sum()

In [12]:
from sklearn.preprocessing import StandardScaler

In [13]:
sc = StandardScaler()
data['Amount'] = sc.fit_transform(pd.DataFrame(data['Amount']))

In [None]:
data.head()

In [15]:
data = data.drop(['Time'], axis =1)

In [None]:
data.head()

In [None]:
data.duplicated().any()

In [18]:
data = data.drop_duplicates()

In [None]:
data.shape

In [None]:
data['Class'].value_counts()

In [24]:
import seaborn as sns
import matplotlib.pyplot as plt
plt.style.use('ggplot')

In [None]:
sns.countplot(data['Class'])
plt.show()

In [26]:
X = data.drop('Class', axis = 1)
y=data['Class']

In [27]:
from sklearn.model_selection import train_test_split

In [28]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [29]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

In [None]:
classifier = {
    "Logistic Regression": LogisticRegression(),
    "Decision Tree Classifier": DecisionTreeClassifier()
}

for name, clf in classifier.items():
    print(f"\n=========={name}===========")
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(f"\n Accuaracy: {accuracy_score(y_test, y_pred)}")
    print(f"\n Precision: {precision_score(y_test, y_pred)}")
    print(f"\n Recall: {recall_score(y_test, y_pred)}")
    print(f"\n F1 Score: {f1_score(y_test, y_pred)}")

In [31]:
# Undersampling

In [32]:
normal = data[data['Class']==0]
fraud = data[data['Class']==1]

In [None]:
normal.shape

In [None]:
fraud.shape

In [35]:
normal_sample = normal.sample(n=473)

In [None]:
normal_sample.shape

In [37]:
new_data = pd.concat([normal_sample,fraud], ignore_index=True)

In [None]:
new_data.head()

In [None]:
new_data['Class'].value_counts()

In [40]:
X = new_data.drop('Class', axis = 1)
y= new_data['Class']

In [41]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [None]:
classifier = {
    "Logistic Regression": LogisticRegression(),
    "Decision Tree Classifier": DecisionTreeClassifier()
}

for name, clf in classifier.items():
    print(f"\n=========={name}===========")
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(f"\n Accuaracy: {accuracy_score(y_test, y_pred)}")
    print(f"\n Precision: {precision_score(y_test, y_pred)}")
    print(f"\n Recall: {recall_score(y_test, y_pred)}")
    print(f"\n F1 Score: {f1_score(y_test, y_pred)}")

In [None]:
# OVERSAMPLING

In [49]:
X = data.drop('Class', axis = 1)
y= data['Class']

In [None]:
X.shape

In [None]:
y.shape

In [52]:
from imblearn.over_sampling import SMOTE

In [53]:
X_res, y_res = SMOTE().fit_resample(X,y)

In [None]:
y_res.value_counts()

In [55]:
X_train, X_test, y_train, y_test = train_test_split(X_res, y_res, test_size = 0.2, random_state = 42)

In [None]:
classifier = {
    "Logistic Regression": LogisticRegression(),
    "Decision Tree Classifier": DecisionTreeClassifier()
}

for name, clf in classifier.items():
    print(f"\n=========={name}===========")
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(f"\n Accuaracy: {accuracy_score(y_test, y_pred)}")
    print(f"\n Precision: {precision_score(y_test, y_pred)}")
    print(f"\n Recall: {recall_score(y_test, y_pred)}")
    print(f"\n F1 Score: {f1_score(y_test, y_pred)}")

In [None]:
dtc = DecisionTreeClassifier()
dtc.fit(X_res, y_res)

In [58]:
import joblib

In [None]:
joblib.dump(dtc, "credit_card_model.pkl")

In [60]:
model = joblib.load("credit_card_model.pkl")

In [None]:
pred = model.predict([[-1.3598071336738,-0.0727811733098497,2.53634673796914,1.37815522427443,-0.338320769942518,0.462387777762292,0.239598554061257,0.0986979012610507,0.363786969611213,0.0907941719789316,-0.551599533260813,-0.617800855762348,-0.991389847235408,-0.311169353699879,1.46817697209427,-0.470400525259478,0.207971241929242,0.0257905801985591,0.403992960255733,0.251412098239705,-0.018306777944153,0.277837575558899,-0.110473910188767,0.0669280749146731,0.128539358273528,-0.189114843888824,0.133558376740387,-0.0210530534538215,149.62]])

In [None]:
pred[0]

In [None]:
if pred[0] == 0:
    print("Normal Transcation")
else:
    print("Fraud Transcation")

In [None]:
# Flask, Streamlit-> Homework