In [16]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score,confusion_matrix

In [17]:
def import_data():
    balance_data = pd.read_csv(
'https://archive.ics.uci.edu/ml/machine-learning-'+
'databases/balance-scale/balance-scale.data',
    sep= ',', header = None)
    print(balance_data.head())
    return balance_data

In [18]:
def splitdataset(balance_data):
    X = balance_data.values[:,1:5]
    y = balance_data.values[:,0]
    X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.3,random_state = 100)
    return X,y,X_train,X_test,y_train,y_test

In [19]:
def train_using_gini(X_train,y_train,X_test):
    clf_gini = DecisionTreeClassifier(criterion='gini',random_state=100,max_depth=3,min_samples_leaf=5)
    clf_gini.fit(X_train,y_train)
    clf_gini_pred = clf_gini.predict(X_test)
    return clf_gini_pred

In [20]:
def train_using_entropy(X_train,y_train,X_test):
    clf_entropy = DecisionTreeClassifier(criterion='entropy',random_state=100,max_depth=3,min_samples_leaf=5)
    clf_entropy.fit(X_train,y_train)
    clf_entropy_pred = clf_entropy.predict(X_test)
    return clf_entropy_pred

In [22]:
if __name__ == "__main__":
    data = import_data()
    X,y,X_train,X_test,y_train,y_test = splitdataset(data)
    clf_gini_pred = train_using_gini(X_train,y_train,X_test)
    clf_entropy_pred = train_using_entropy(X_train,y_train,X_test)
    print("Accuracy of Gini Decision Tree : ", accuracy_score(clf_gini_pred,y_test))
    print("Confusion Matrix of Gini Decision Tree : \n", confusion_matrix(clf_gini_pred,y_test))
    print("Accuracy of Entropy Decision Tree : ", accuracy_score(clf_entropy_pred,y_test))
    print("Confusion Matrix of Entropy Decision Tree : \n", confusion_matrix(clf_entropy_pred,y_test))

   0  1  2  3  4
0  B  1  1  1  1
1  R  1  1  1  2
2  R  1  1  1  3
3  R  1  1  1  4
4  R  1  1  1  5
Accuracy of Gini Decision Tree :  0.7340425531914894
Confusion Matrix of Gini Decision Tree : 
 [[ 0  0  0]
 [ 6 67 19]
 [ 7 18 71]]
Accuracy of Entropy Decision Tree :  0.7074468085106383
Confusion Matrix of Entropy Decision Tree : 
 [[ 0  0  0]
 [ 6 63 20]
 [ 7 22 70]]
