In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

# Function to import Dataset
def importdata():
    # Load dataset from URL
    balance_data = pd.read_csv(
        'https://archive.ics.uci.edu/ml/machine-learning-databases/balance-scale/balance-scale.data', 
        sep=',', header=None)
    
    # Display dataset info
    print("Dataset Length: ", len(balance_data))
    print("Dataset Shape: ", balance_data.shape)
    print("First 5 rows of Dataset:\n", balance_data.head())
    
    return balance_data
  
# Function to split the dataset
def splitdataset(balance_data):
    # Separating the features and target variable
    X = balance_data.values[:, 1:5]
    Y = balance_data.values[:, 0]
  
    # Splitting the dataset into train and test sets with balanced splits
    X_train, X_test, y_train, y_test = train_test_split(
        X, Y, test_size=0.3, random_state=100, stratify=Y)  
      
    return X, Y, X_train, X_test, y_train, y_test
  
# Function to perform training with Gini Index
def train_using_gini(X_train, y_train):
    # Create and train the Decision Tree classifier with Gini index
    clf_gini = DecisionTreeClassifier(criterion="gini", random_state=100, max_depth=3, min_samples_leaf=5)
    clf_gini.fit(X_train, y_train)
    return clf_gini
      
# Function to perform training with Entropy
def train_using_entropy(X_train, y_train):
    # Create and train the Decision Tree classifier with Entropy
    clf_entropy = DecisionTreeClassifier(criterion="entropy", random_state=100, max_depth=3, min_samples_leaf=5)
    clf_entropy.fit(X_train, y_train)
    return clf_entropy
  
# Function to make predictions
def prediction(X_test, clf_object):
    # Prediction on test set
    y_pred = clf_object.predict(X_test)
    print("Predicted values:\n", y_pred)
    return y_pred
      
# Function to calculate accuracy
def cal_accuracy(y_test, y_pred):
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
    print("Accuracy:", accuracy_score(y_test, y_pred) * 100, "%")
    
    # Classification report with zero_division set to 1 to handle any warnings
    print("Classification Report:\n", classification_report(y_test, y_pred, zero_division=1))
  
# Main function to run the classifier
def main():
    # Importing data and splitting it
    data = importdata()
    X, Y, X_train, X_test, y_train, y_test = splitdataset(data)
    
    # Training using Gini index and Entropy
    clf_gini = train_using_gini(X_train, y_train)
    clf_entropy = train_using_entropy(X_train, y_train)
  
    # Testing phase with Gini
    print("\nResults Using Gini Index:")
    y_pred_gini = prediction(X_test, clf_gini)
    cal_accuracy(y_test, y_pred_gini)
  
    # Testing phase with Entropy
    print("\nResults Using Entropy:")
    y_pred_entropy = prediction(X_test, clf_entropy)
    cal_accuracy(y_test, y_pred_entropy)
  
# Run main
if __name__ == "__main__": 
    main()


Dataset Length:  625
Dataset Shape:  (625, 5)
First 5 rows of Dataset:
    0  1  2  3  4
0  B  1  1  1  1
1  R  1  1  1  2
2  R  1  1  1  3
3  R  1  1  1  4
4  R  1  1  1  5

Results Using Gini Index:
Predicted values:
 ['L' 'R' 'L' 'R' 'L' 'L' 'L' 'L' 'L' 'L' 'R' 'L' 'L' 'L' 'L' 'R' 'R' 'L'
 'R' 'L' 'R' 'R' 'R' 'R' 'R' 'R' 'L' 'R' 'L' 'R' 'R' 'R' 'L' 'R' 'L' 'L'
 'R' 'L' 'R' 'R' 'R' 'L' 'R' 'L' 'L' 'R' 'L' 'R' 'L' 'R' 'L' 'R' 'R' 'L'
 'L' 'R' 'L' 'L' 'R' 'R' 'L' 'R' 'L' 'L' 'R' 'R' 'R' 'R' 'L' 'R' 'R' 'L'
 'L' 'L' 'R' 'R' 'R' 'L' 'R' 'L' 'L' 'R' 'L' 'L' 'R' 'R' 'L' 'L' 'R' 'L'
 'L' 'R' 'R' 'R' 'L' 'R' 'R' 'L' 'R' 'L' 'R' 'L' 'R' 'R' 'L' 'L' 'L' 'R'
 'L' 'R' 'L' 'L' 'R' 'L' 'R' 'L' 'R' 'R' 'R' 'L' 'R' 'L' 'L' 'L' 'L' 'L'
 'L' 'L' 'L' 'R' 'L' 'R' 'R' 'R' 'L' 'L' 'L' 'R' 'L' 'R' 'R' 'L' 'R' 'L'
 'R' 'R' 'L' 'R' 'L' 'R' 'R' 'R' 'R' 'R' 'R' 'L' 'R' 'R' 'L' 'R' 'L' 'L'
 'L' 'L' 'R' 'R' 'L' 'R' 'L' 'L' 'R' 'R' 'R' 'R' 'L' 'R' 'L' 'R' 'R' 'L'
 'L' 'R' 'R' 'R' 'L' 'R' 'L' 'L']
Confusion Matrix