In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, roc_auc_score, roc_curve, accuracy_score

# ==========================================
# Step 1: Load Data
# ==========================================
# OPTION A: If you have the CSV (uncomment below)
# df = pd.read_csv('banknote_authentication.csv') 

# OPTION B: Dummy data (Run this if you don't have a file)
data = {
    'Variance': [3.6, 4.5, -1.3, 3.4, -2.5, 5.1, -3.2, 1.2, 4.0, -1.5],
    'Skewness': [8.6, 8.1, 7.4, 6.5, 4.3, 2.1, 9.1, 3.4, 7.5, 5.2],
    'Curtosis': [-2.8, -2.5, 0.5, -2.1, 1.2, -0.5, 2.3, 4.2, -3.1, 1.1],
    'Entropy': [-0.4, -1.2, -0.2, -0.5, 0.8, -1.1, 0.5, -0.1, -1.5, 0.3],
    'Class': [0, 0, 1, 0, 1, 0, 1, 1, 0, 1] # 0 = Authentic, 1 = Fake
}
df = pd.DataFrame(data)

# ==========================================
# Step 2: Split Data
# ==========================================
X = df.drop('Class', axis=1) # All columns except 'Class'
y = df['Class']              # Target variable

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# ==========================================
# Step 3: Train Models
# ==========================================
# Model 1: Logistic Regression
log_model = LogisticRegression()
log_model.fit(X_train, y_train)

# Model 2: Decision Tree
tree_model = DecisionTreeClassifier()
tree_model.fit(X_train, y_train)

# ==========================================
# Step 4: Evaluate (Confusion Matrix)
# ==========================================
# We predict classes (0 or 1) for Confusion Matrix
y_pred_log = log_model.predict(X_test)
y_pred_tree = tree_model.predict(X_test)

print("--- Confusion Matrices ---")
print("Logistic Regression:\n", confusion_matrix(y_test, y_pred_log))
print("Decision Tree:\n", confusion_matrix(y_test, y_pred_tree))

# ==========================================
# Step 5: Evaluate (ROC & AUC)
# ==========================================
# PRO TIP: For ROC, we need Probabilities (.predict_proba), not just class labels
log_probs = log_model.predict_proba(X_test)[:, 1]
tree_probs = tree_model.predict_proba(X_test)[:, 1]

# Calculate AUC Score
log_auc = roc_auc_score(y_test, log_probs)
tree_auc = roc_auc_score(y_test, tree_probs)

print("\n--- AUC Scores ---")
print(f"Logistic Regression AUC: {log_auc:.2f}")
print(f"Decision Tree AUC: {tree_auc:.2f}")

# ==========================================
# Step 6: Visualization (ROC Curve)
# ==========================================


# [Image of ROC Curve]

fpr_log, tpr_log, _ = roc_curve(y_test, log_probs)
fpr_tree, tpr_tree, _ = roc_curve(y_test, tree_probs)

plt.plot(fpr_log, tpr_log, label=f'Logistic (AUC={log_auc:.2f})')
plt.plot(fpr_tree, tpr_tree, label=f'Tree (AUC={tree_auc:.2f})')
plt.plot([0, 1], [0, 1], 'k--') # Diagonal dashed line (Random guess)
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve Comparison')
plt.legend()
plt.show()