In [21]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import export_text
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score


In [11]:
data = pd.read_csv("./data/BitcoinHeistData.csv")

In [15]:
data = data.drop(['year', 'day', 'address'], axis = 1)

In [16]:
data.head()

Unnamed: 0,length,weight,count,looped,neighbors,income,label
0,18,0.008333,1,0,2,100050000.0,princetonCerber
1,44,0.000244,1,0,1,100000000.0,princetonLocky
2,0,1.0,1,0,2,200000000.0,princetonCerber
3,72,0.003906,1,0,2,71200000.0,princetonCerber
4,144,0.072848,456,0,1,200000000.0,princetonLocky


In [20]:
# Split the dataset into training and testing sets
X = data.drop('label', axis=1)
y = data['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Train and evaluate a decision tree classifier
dtc = DecisionTreeClassifier(random_state=42)
dtc.fit(X_train, y_train)
y_pred_dtc = dtc.predict(X_test)
acc_dtc = accuracy_score(y_test, y_pred_dtc)
print("Accuracy of Decision Tree Classifier:", acc_dtc)


Accuracy of Decision Tree Classifier: 0.9770768334076182
|--- income <= 420100592.00
|   |--- income <= 399982560.00
|   |   |--- income <= 74987520.00
|   |   |   |--- neighbors <= 1.50
|   |   |   |   |--- income <= 49950032.00
|   |   |   |   |   |--- income <= 30019791.00
|   |   |   |   |   |   |--- income <= 30019755.00
|   |   |   |   |   |   |   |--- weight <= 0.60
|   |   |   |   |   |   |   |   |--- length <= 3.00
|   |   |   |   |   |   |   |   |   |--- income <= 30014839.00
|   |   |   |   |   |   |   |   |   |   |--- count <= 1.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 10
|   |   |   |   |   |   |   |   |   |   |--- count >  1.50
|   |   |   |   |   |   |   |   |   |   |   |--- class: white
|   |   |   |   |   |   |   |   |   |--- income >  30014839.00
|   |   |   |   |   |   |   |   |   |   |--- income <= 30015042.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: paduaCryptoWall
|   |   |   |   |   |   |   |   |   |   |--- inco

In [22]:
tree_rules = export_text(dtc, feature_names=list(X.columns))
print(tree_rules)



|--- income <= 420100592.00
|   |--- income <= 399982560.00
|   |   |--- income <= 74987520.00
|   |   |   |--- neighbors <= 1.50
|   |   |   |   |--- income <= 49950032.00
|   |   |   |   |   |--- income <= 30019791.00
|   |   |   |   |   |   |--- income <= 30019755.00
|   |   |   |   |   |   |   |--- weight <= 0.60
|   |   |   |   |   |   |   |   |--- length <= 3.00
|   |   |   |   |   |   |   |   |   |--- income <= 30014839.00
|   |   |   |   |   |   |   |   |   |   |--- count <= 1.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 10
|   |   |   |   |   |   |   |   |   |   |--- count >  1.50
|   |   |   |   |   |   |   |   |   |   |   |--- class: white
|   |   |   |   |   |   |   |   |   |--- income >  30014839.00
|   |   |   |   |   |   |   |   |   |   |--- income <= 30015042.00
|   |   |   |   |   |   |   |   |   |   |   |--- class: paduaCryptoWall
|   |   |   |   |   |   |   |   |   |   |--- income >  30015042.00
|   |   |   |   |   |   |   |   |   |  

In [None]:
acc = accuracy_score(y_test, y_pred_dtc)
prec = precision_score(y_test, y_pred_dtc, average='weighted')
rec = recall_score(y_test, y_pred_dtc, average='weighted')
f1 = f1_score(y_test, y_pred_dtc, average='weighted')

# Print the confusion matrix and other details
print("Confusion matrix:\n", confusion_matrix(y_test, y_pred_dtc))
print("Accuracy:", acc)
print("Precision:", prec)
print("Recall:", rec)
print("F1 Score:", f1)