In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier

# Load the dataset

df = pd.read_csv('data_banknote_authentication.csv')

# Assign column names
df.columns = ['variance', 'skewness', 'curtosis', 'entropy', 'class']

# Separate predictors (X) and target variable (y)
X = df.drop('class', axis=1)
y = df['class']

# Split data into train and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data (not strictly necessary for tree-based models, but good practice)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [2]:
# Decision Tree Classifier
dt_classifier = DecisionTreeClassifier(random_state=42)
dt_classifier.fit(X_train_scaled, y_train)

# Predictions
y_pred_dt = dt_classifier.predict(X_test_scaled)

# Evaluation metrics
accuracy_dt = accuracy_score(y_test, y_pred_dt)
precision_dt = precision_score(y_test, y_pred_dt)
recall_dt = recall_score(y_test, y_pred_dt)
f1_dt = f1_score(y_test, y_pred_dt)

print("Decision Tree Classifier Metrics:")
print(f"Accuracy: {accuracy_dt:.4f}")
print(f"Precision: {precision_dt:.4f}")
print(f"Recall: {recall_dt:.4f}")
print(f"F1-score: {f1_dt:.4f}")


Decision Tree Classifier Metrics:
Accuracy: 0.9818
Precision: 1.0000
Recall: 0.9606
F1-score: 0.9799


In [3]:
# Logistic Regression Classifier
lr_classifier = LogisticRegression(random_state=42)
lr_classifier.fit(X_train_scaled, y_train)

# Predictions
y_pred_lr = lr_classifier.predict(X_test_scaled)

# Evaluation metrics
accuracy_lr = accuracy_score(y_test, y_pred_lr)
precision_lr = precision_score(y_test, y_pred_lr)
recall_lr = recall_score(y_test, y_pred_lr)
f1_lr = f1_score(y_test, y_pred_lr)

print("\nLogistic Regression Classifier Metrics:")
print(f"Accuracy: {accuracy_lr:.4f}")
print(f"Precision: {precision_lr:.4f}")
print(f"Recall: {recall_lr:.4f}")
print(f"F1-score: {f1_lr:.4f}")



Logistic Regression Classifier Metrics:
Accuracy: 0.9782
Precision: 0.9690
Recall: 0.9843
F1-score: 0.9766


In [4]:
# Random Forest Classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train_scaled, y_train)

# Predictions
y_pred_rf = rf_classifier.predict(X_test_scaled)

# Evaluation metrics
accuracy_rf = accuracy_score(y_test, y_pred_rf)
precision_rf = precision_score(y_test, y_pred_rf)
recall_rf = recall_score(y_test, y_pred_rf)
f1_rf = f1_score(y_test, y_pred_rf)

print("\nRandom Forest Classifier Metrics:")
print(f"Accuracy: {accuracy_rf:.4f}")
print(f"Precision: {precision_rf:.4f}")
print(f"Recall: {recall_rf:.4f}")
print(f"F1-score: {f1_rf:.4f}")



Random Forest Classifier Metrics:
Accuracy: 0.9927
Precision: 1.0000
Recall: 0.9843
F1-score: 0.9921


In [5]:
# AdaBoost Classifier
ada_classifier = AdaBoostClassifier(n_estimators=100, random_state=42)
ada_classifier.fit(X_train_scaled, y_train)

# Predictions
y_pred_ada = ada_classifier.predict(X_test_scaled)

# Evaluation metrics
accuracy_ada = accuracy_score(y_test, y_pred_ada)
precision_ada = precision_score(y_test, y_pred_ada)
recall_ada = recall_score(y_test, y_pred_ada)
f1_ada = f1_score(y_test, y_pred_ada)

print("\nAdaBoost Classifier Metrics:")
print(f"Accuracy: {accuracy_ada:.4f}")
print(f"Precision: {precision_ada:.4f}")
print(f"Recall: {recall_ada:.4f}")
print(f"F1-score: {f1_ada:.4f}")



AdaBoost Classifier Metrics:
Accuracy: 0.9964
Precision: 1.0000
Recall: 0.9921
F1-score: 0.9960


In [6]:
# Create a DataFrame to compare metrics
metrics_df = pd.DataFrame({
    'Decision Tree': [accuracy_dt, precision_dt, recall_dt, f1_dt],
    'Logistic Regression': [accuracy_lr, precision_lr, recall_lr, f1_lr],
    'Random Forest': [accuracy_rf, precision_rf, recall_rf, f1_rf],
    'AdaBoost': [accuracy_ada, precision_ada, recall_ada, f1_ada]
}, index=['Accuracy', 'Precision', 'Recall', 'F1-score'])

print("\nComparison of Model Performances:")
print(metrics_df)



Comparison of Model Performances:
           Decision Tree  Logistic Regression  Random Forest  AdaBoost
Accuracy        0.981818             0.978182       0.992727  0.996364
Precision       1.000000             0.968992       1.000000  1.000000
Recall          0.960630             0.984252       0.984252  0.992126
F1-score        0.979920             0.976562       0.992063  0.996047
