# Decision Tree CLASSIFICATION using Gini and Entropy

#### IMPORTING LIBRARIES AND DATASET

In [1]:
import numpy as np
import pandas as pd



In [2]:
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

#### Defining function for:
- loading dataset (importdata())
- splitting dataset to train and test set (split_dataset())
- training DT using gini (train_using_gini())
- training DT using entropy (train_using_entropy())
- prediction on test set (predictions())
- calculation of accuracy_score, confusion_matrix, classification_report (cal_accuracy())

Main function has steps of training model, prediting on test set and calculating accuracy, etc

In [3]:
def importdata():
    balance_data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/balance-scale/balance-scale.data', sep=',')
    
    print("Dataset length:", len(balance_data))
    print("Dataset shape:", balance_data.shape)
    
    print("Dataset:\n", balance_data.head())
    return balance_data

In [16]:
def split_dataset(balance_data):
    X = balance_data.iloc[:, 1:5]
    y = balance_data.iloc[:, 0]
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    
    return X, y, X_train, X_test, y_train, y_test

In [5]:
def train_using_gini(X_train, y_train):
    clf_gini = DecisionTreeClassifier(criterion='gini', random_state=42, max_depth=3, min_samples_leaf=5)
    
    clf_gini.fit(X_train, y_train)
    return clf_gini

def train_using_entropy(X_train, y_train):
    clf_entropy = DecisionTreeClassifier(criterion='entropy', random_state=42, max_depth=3, min_samples_leaf=5)
    
    clf_entropy.fit(X_train, y_train)
    return clf_entropy

In [7]:
def predictions(X_test, clf_object):
    y_pred = clf_object.predict(X_test)
    print("Predicted values:")
    print(y_pred)
    
    return y_pred

In [10]:
def cal_accuracy(y_test, y_pred):
    
    print("Confusion matrix:", confusion_matrix(y_test, y_pred))
    print("Accuracy:", accuracy_score(y_test, y_pred)*100)
    print("Report:", classification_report(y_test, y_pred))

In [18]:
def main():
    data = importdata()
    X, y, X_train, X_test, y_train, y_test = split_dataset(data)
    clf_gini = train_using_gini(X_train, y_train)
    clf_entropy = train_using_entropy(X_train, y_train)
    
    print("Results using Gini Index:")
    y_pred_gini = predictions(X_test, clf_gini)
    cal_accuracy(y_test, y_pred_gini)
    
    print("Results using Entropy:")
    y_pred_entropy = predictions(X_test, clf_entropy)
    cal_accuracy(y_test, y_pred_entropy)

### Calling main 

In [19]:
if __name__=="__main__":
    main()

Dataset length: 624
Dataset shape: (624, 5)
Dataset:
    B  1  1.1  1.2  1.3
0  R  1    1    1    2
1  R  1    1    1    3
2  R  1    1    1    4
3  R  1    1    1    5
4  R  1    1    2    1
Results using Gini Index:
Predicted values:
['L' 'L' 'R' 'R' 'L' 'L' 'R' 'R' 'L' 'R' 'R' 'R' 'L' 'R' 'L' 'R' 'R' 'L'
 'R' 'L' 'R' 'R' 'L' 'L' 'L' 'R' 'R' 'L' 'L' 'R' 'L' 'R' 'R' 'L' 'R' 'R'
 'L' 'L' 'R' 'L' 'L' 'R' 'R' 'L' 'L' 'R' 'R' 'R' 'R' 'R' 'L' 'R' 'R' 'R'
 'L' 'L' 'R' 'L' 'R' 'L' 'L' 'L' 'R' 'L' 'L' 'L' 'R' 'R' 'R' 'R' 'R' 'L'
 'R' 'R' 'R' 'R' 'L' 'R' 'L' 'R' 'L' 'R' 'R' 'R' 'R' 'L' 'L' 'L' 'L' 'L'
 'L' 'L' 'L' 'L' 'R' 'R' 'R' 'L' 'R' 'R' 'R' 'L' 'L' 'R' 'R' 'R' 'L' 'R'
 'L' 'R' 'R' 'L' 'R' 'R' 'L' 'R' 'L' 'R' 'L' 'R' 'R' 'L' 'R' 'R' 'R' 'R'
 'L' 'R' 'L' 'R' 'L' 'R' 'L' 'L' 'L' 'L' 'R' 'R' 'R' 'L' 'R' 'L' 'R' 'R'
 'R' 'R' 'R' 'L' 'R' 'R' 'L' 'R' 'R' 'L' 'L' 'R' 'L' 'L' 'R' 'L' 'L' 'R'
 'R' 'L' 'L' 'R' 'R' 'L' 'R' 'L' 'L' 'R' 'R' 'R' 'L' 'L' 'R' 'L' 'R' 'R'
 'R' 'R' 'L' 'R' 'L' 'R' 'R' 'R']


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
