In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.metrics import roc_auc_score, matthews_corrcoef

#Loading the dataset
df = pd.read_csv("threats.csv")

#Spliting the data into features (X) and the target variable (y)
X = df.drop("Threat", axis=1)
y = df["Threat"]

#Spliting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Call the Decision Tree model
model = DecisionTreeClassifier(random_state=42)

#Train the model
model.fit(X_train, y_train)

#Make predictions on the test set
y_pred = model.predict(X_test)

#Evaluate the model using accuracy, confusion matrix, and classification report 
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

#printing the accuracy confusion matrix, and classification report
print("Accuracy:",accuracy)
print(f"Confusion Matrix:\n{conf_matrix}")
print(f"Classification Report:\n{classification_rep}")


#Evaluate the performance using various metrics
auc = roc_auc_score(y_test, y_pred)
mcc = matthews_corrcoef(y_test, y_pred)


#printing the AUC and the Matthews Correlation Coefficient
print("AUC:", auc)
print("Matthews Correlation Coefficient:", mcc)


Accuracy: 0.995745
Confusion Matrix: [[199149    473]
 [   378      0]]
Classification Report:               precision    recall  f1-score   support

           0       1.00      1.00      1.00    199622
           1       0.00      0.00      0.00       378

    accuracy                           1.00    200000
   macro avg       0.50      0.50      0.50    200000
weighted avg       1.00      1.00      1.00    200000

AUC: 0.49881526084299327
Matthews Correlation Coefficient: -0.002118709591644109
