## Data Loading

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib
import seaborn as sns
import sklearn
import imblearn
import matplotlib.pyplot as plt
import time
import sklearn.metrics as m
import xgboost as xgb
# Ignore warnings
import warnings
warnings.filterwarnings('ignore')
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split 
#Settings
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)



In [None]:
train_transaction=pd.read_csv('/content/drive/MyDrive/ieee-fraud-detection/train_transaction.csv')
train_identity=pd.read_csv('/content/drive/MyDrive/ieee-fraud-detection/train_identity.csv')
test_transaction=pd.read_csv('/content/drive/MyDrive/ieee-fraud-detection/test_transaction.csv')
test_identity=pd.read_csv('/content/drive/MyDrive/ieee-fraud-detection/test_identity.csv')
sample_submission=pd.read_csv('/content/drive/MyDrive/ieee-fraud-detection/sample_submission.csv')

### Merging multiple dataframes

In [None]:
train_df = train_transaction.merge(train_identity, how="left", on="TransactionID")

test_df = test_transaction.merge(test_identity, how="left", on="TransactionID")

### Renaming test dataframe

In [None]:
test_df= test_df.rename(columns=lambda x:"_".join(x.split("-")))

### Data preprocessing

In [None]:
train_df = train_df.fillna(-999)
test_df = test_df.fillna(-999)

In [None]:
# Label Encoding
for f in train_df.columns:
    if train_df[f].dtype=='object': 
        lbl = LabelEncoder()
        lbl.fit(list(train_df[f].values))
        train_df[f] = lbl.transform(list(train_df[f].values)) 

In [None]:
for f in test_df.columns:
    if test_df[f].dtype=='object': 
        lbl = LabelEncoder()
        lbl.fit(list(test_df[f].values))
        test_df[f] = lbl.transform(list(test_df[f].values))

### Splitting into target and features

In [None]:
y = train_df["isFraud"]
X = train_df.drop(["isFraud", "TransactionID"], axis=1)
test_df = test_df.drop(["TransactionID"], axis=1)

### Splitting into training and validation sets

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0, stratify=y)

### Applying machine learning algorithms

In [None]:
from sklearn.tree import DecisionTreeClassifier 
model = DecisionTreeClassifier() 
model.fit(X_train, y_train) 


DecisionTreeClassifier()

In [None]:
model.score(X_test, y_test)

0.9677696119935426

In [None]:
from sklearn.ensemble import RandomForestClassifier 
model2 = RandomForestClassifier(random_state = 0) 
model2.fit(X_train, y_train) 
model2.score(X_test, y_test)

0.9794143213555955

In [None]:
from sklearn.linear_model import LogisticRegression 
model3 = LogisticRegression(random_state = 0) 
model3.fit(X_train, y_train) 
model3.score(X_test, y_test)

0.9649416917849201

In [None]:
from sklearn.ensemble import AdaBoostClassifier 
model4 = AdaBoostClassifier(n_estimators = 100, random_state = 0) 
model4.fit(X_train, y_train) 
model4.score(X_test, y_test)

0.9726803716372585

In [None]:
from sklearn.ensemble import GradientBoostingClassifier 
model5 = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0) 
model5.fit(X_train, y_train) 
model5.score(X_test, y_test)

0.9650094264006954

### Confusion matrix

In [None]:

y_pred = model.predict(X_train)
y_true = y_train
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_true, y_pred)
import seaborn as sns
import matplotlib.pyplot as plt 
f, ax = plt.subplots(figsize = (5, 5))
sns.heatmap(cm, annot = True, linewidths = 0.5, linecolor = "Black", fmt = ".0f", ax = ax) 
plt.show()

from sklearn.metrics import classification_report
print(classification_report(y_true, y_pred))

In [None]:

y_pred = model2.predict(X_train)
y_true = y_train
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_true, y_pred)
import seaborn as sns
import matplotlib.pyplot as plt 
f, ax = plt.subplots(figsize = (5, 5))
sns.heatmap(cm, annot = True, linewidths = 0.5, linecolor = "Black", fmt = ".0f", ax = ax) 
plt.show()

from sklearn.metrics import classification_report
print(classification_report(y_true, y_pred))

In [None]:

y_pred = model3.predict(X_train)
y_true = y_train
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_true, y_pred)
import seaborn as sns
import matplotlib.pyplot as plt 
f, ax = plt.subplots(figsize = (5, 5))
sns.heatmap(cm, annot = True, linewidths = 0.5, linecolor = "Black", fmt = ".0f", ax = ax) 
plt.show()

from sklearn.metrics import classification_report
print(classification_report(y_true, y_pred))

In [None]:

y_pred = model4.predict(X_train)
y_true = y_train
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_true, y_pred)
import seaborn as sns
import matplotlib.pyplot as plt 
f, ax = plt.subplots(figsize = (5, 5))
sns.heatmap(cm, annot = True, linewidths = 0.5, linecolor = "Black", fmt = ".0f", ax = ax) 
plt.show()

from sklearn.metrics import classification_report
print(classification_report(y_true, y_pred))

In [None]:

y_pred = model5.predict(X_train)
y_true = y_train
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_true, y_pred)
import seaborn as sns
import matplotlib.pyplot as plt 
f, ax = plt.subplots(figsize = (5, 5))
sns.heatmap(cm, annot = True, linewidths = 0.5, linecolor = "Black", fmt = ".0f", ax = ax) 
plt.show()

from sklearn.metrics import classification_report
print(classification_report(y_true, y_pred))