In [20]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [21]:
# Load your dataset (replace 'data.csv' with your dataset's filename)
data = pd.read_csv('/content/data.csv')
print(data)

       step      type     amount     nameOrig  oldbalanceOrg  newbalanceOrig  \
0         1   PAYMENT    9839.64  C1231006815       170136.0       160296.36   
1         1   PAYMENT    1864.28  C1666544295        21249.0        19384.72   
2         1  TRANSFER     181.00  C1305486145          181.0            0.00   
3         1  CASH_OUT     181.00   C840083671          181.0            0.00   
4         1   PAYMENT   11668.14  C2048537720        41554.0        29885.86   
...     ...       ...        ...          ...            ...             ...   
83556    10   PAYMENT    7705.70  C1834114901        96490.0        88784.30   
83557    10  CASH_OUT  319045.01  C1964329082        56471.0            0.00   
83558    10   CASH_IN  249169.96  C1421944154         3481.0       252650.96   
83559    10  CASH_OUT  244279.64   C722886752        29968.0            0.00   
83560    10  CASH_OUT  145014.63    C60491101            NaN             NaN   

          nameDest  oldbalanceDest  new

In [22]:
# Drop rows with missing values (NaN)
data = data.dropna()
# Check the column names in your dataset
print(data.columns)
print(data)

Index(['step', 'type', 'amount', 'nameOrig', 'oldbalanceOrg', 'newbalanceOrig',
       'nameDest', 'oldbalanceDest', 'newbalanceDest', 'isFraud',
       'isFlaggedFraud'],
      dtype='object')
       step      type     amount     nameOrig  oldbalanceOrg  newbalanceOrig  \
0         1   PAYMENT    9839.64  C1231006815       170136.0       160296.36   
1         1   PAYMENT    1864.28  C1666544295        21249.0        19384.72   
2         1  TRANSFER     181.00  C1305486145          181.0            0.00   
3         1  CASH_OUT     181.00   C840083671          181.0            0.00   
4         1   PAYMENT   11668.14  C2048537720        41554.0        29885.86   
...     ...       ...        ...          ...            ...             ...   
83555    10  CASH_OUT   14895.17   C214279684        51759.0        36863.83   
83556    10   PAYMENT    7705.70  C1834114901        96490.0        88784.30   
83557    10  CASH_OUT  319045.01  C1964329082        56471.0            0.00   
83558 

In [23]:
# Select relevant features and target variable
X = data[['step', 'type', 'amount', 'oldbalanceOrg', 'newbalanceOrig', 'oldbalanceDest', 'newbalanceDest']]
y = data['isFraud']

In [24]:
# Encode categorical feature 'type' using one-hot encoding
X = pd.get_dummies(data=X, columns=['type'], drop_first=True)

In [25]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [26]:
# Implement and train the Decision Tree Classifier
dt_classifier = DecisionTreeClassifier()
dt_classifier.fit(X_train, y_train)
DecisionTreeClassifier()

In [28]:
# Predict with Decision Tree Classifier
dt_predictions = dt_classifier.predict(X_test)

In [29]:
# Evaluate Decision Tree Classifier performance
dt_accuracy = accuracy_score(y_test, dt_predictions)
dt_precision = precision_score(y_test, dt_predictions)
dt_recall = recall_score(y_test, dt_predictions)
dt_f1 = f1_score(y_test, dt_predictions)

In [30]:
# Implement and train the Naïve Bayes Classifier (Gaussian NB for continuous data)
nb_classifier = GaussianNB()
nb_classifier.fit(X_train, y_train)

In [31]:

# Predict with Naïve Bayes Classifier
nb_predictions = nb_classifier.predict(X_test)

In [32]:
# Evaluate Naïve Bayes Classifier performance
nb_accuracy = accuracy_score(y_test, nb_predictions)
nb_precision = precision_score(y_test, nb_predictions)
nb_recall = recall_score(y_test, nb_predictions)
nb_f1 = f1_score(y_test, nb_predictions)


In [33]:
# Compare the performance of Decision Tree
print("Decision Tree Classifier:")
print(f"Accuracy: {dt_accuracy:.2f}")
print(f"Precision: {dt_precision:.2f}")
print(f"Recall: {dt_recall:.2f}")
print(f"F1 Score: {dt_f1:.2f}")

Decision Tree Classifier:
Accuracy: 1.00
Precision: 0.54
Recall: 0.79
F1 Score: 0.64


In [34]:
# Compare the performance of Naïve Bayes
print("\nNaïve Bayes Classifier:")
print(f"Accuracy: {nb_accuracy:.2f}")
print(f"Precision: {nb_precision:.2f}")
print(f"Recall: {nb_recall:.2f}")
print(f"F1 Score: {nb_f1:.2f}")


Naïve Bayes Classifier:
Accuracy: 0.98
Precision: 0.01
Recall: 0.08
F1 Score: 0.01
