**Importing Libraries**

In [None]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import classification_report
eps = np.finfo(float).eps
from numpy import log2 as log
import pprint

**Importing the dataset**

In [None]:
dataset=pd.read_csv('engine.csv')
dataset=pd.DataFrame(dataset,columns=["Engine","Turbo","Weight","Fast"])
filter = dataset["Engine"]=="Small"
dataset.where(filter).count()
dataset_encoded=dataset.iloc[:,0:5]
le=LabelEncoder()

for i in dataset_encoded:
    dataset_encoded[i]=le.fit_transform(dataset_encoded[i])

print(dataset)

   Engine Turbo   Weight Fast
0   small   yes  average  yes
1  medium    no    heavy  yes
2   large    no  average  yes
3  medium    no    light   no
4   large   yes    heavy   no
5   large    no    heavy   no
6  medium   yes    light  yes
7   large    no  average  yes
8  medium    no    heavy   no
9  medium    no    heavy   no


**Encoded Dataset**

In [None]:
print(dataset_encoded)

   Engine  Turbo  Weight  Fast
0       2      1       0     1
1       1      0       1     1
2       0      0       0     1
3       1      0       2     0
4       0      1       1     0
5       0      0       1     0
6       1      1       2     1
7       0      0       0     1
8       1      0       1     0
9       1      0       1     0


**Calculating Entropy of Dataset**

In [None]:
entropy_node = 0  #Initialize Entropy
values = dataset_encoded.Fast.unique()  
for value in values:
    fraction = dataset.Fast.value_counts()[value]/len(dataset_encoded.Fast)  
    entropy_node += -fraction*np.log2(fraction)

In [None]:
entropy_node

1.0

**Calculating entropy of each attribute**

In [None]:
def ent(df,attribute):
    target_variables = dataset_encoded.Fast.unique()  
    variables = dataset_encoded[attribute].unique()    


    entropy_attribute = 0
    for variable in variables:
        entropy_each_feature = 0
        for target_variable in target_variables:
            num = len(dataset_encoded[attribute][dataset_encoded[attribute]==variable][dataset_encoded.Fast ==target_variable]) 
            den = len(dataset_encoded[attribute][dataset_encoded[attribute]==variable])  #denominator
            fraction = num/(den+eps)  
            entropy_each_feature += -fraction*log(fraction+eps) #This calculates entropy for one feature
        fraction2 = den/len(df)
        entropy_attribute += -fraction2*entropy_each_feature   

    return(abs(entropy_attribute))

In [None]:
a_entropy = {k:ent(dataset_encoded,k) for k in dataset_encoded.keys()[:-1]}
a_entropy

{'Engine': 0.8854752972273338,
 'Turbo': 0.9651484454403222,
 'Weight': 0.5609640474436807}

**Calculating Information Gain**

In [None]:
def ig(e_dataset,e_attr):
    return(e_dataset-e_attr)

In [None]:
IG = {k:ig(entropy_node,a_entropy[k]) for k in a_entropy}
IG

{'Engine': 0.11452470277266624,
 'Turbo': 0.03485155455967781,
 'Weight': 0.43903595255631933}

**Extracting Feature with Maximum Information Gain**

In [None]:
def find_winner(df):
    Entropy_att = []
    IG = []
    for key in df.keys()[:-1]:
        IG.append(find_entropy(df)-find_entropy_attribute(df,key))
    return df.keys()[:-1][np.argmax(IG)]

In [None]:
find_winner(dataset_encoded)

'Weight'

In [None]:
def get_subtable(df, node,value):
  return df[df[node] == value].reset_index(drop=True)

**Building Decision Tree**

In [None]:
def buildTree(df,tree=None): 
    Class = df.keys()[-1]   
    
   

    #Get attribute with maximum information gain
    node = find_winner(df)
    
    #Get distinct value of that attribute 
    attValue = np.unique(df[node])
    
    #Create an empty dictionary to create tree    
    if tree is None:                    
        tree={}
        tree[node] = {}
    
   #We make loop to construct a tree by calling this function recursively. 
    

    for value in attValue:
        
        subtable = get_subtable(df,node,value)
        clValue,counts = np.unique(subtable['Engine'],return_counts=True)                        
        
        if len(counts)==1:#Checking purity of subset
            tree[node][value] = clValue[0]                                                    
        else:        
            tree[node][value] = buildTree(subtable) #Calling the function recursively 
                   
    return tree

In [None]:
t=buildTree(dataset)
pprint.pprint(t)

{'Weight': {'average': {'Engine': {'large': 'large', 'small': 'small'}},
            'heavy': {'Engine': {'large': 'large', 'medium': 'medium'}},
            'light': 'medium'}}


**Implementation of CART Algorithm**

In [None]:
#Feature Set
X=dataset_encoded.iloc[:,0:3].values
#Label Set
y=dataset_encoded.iloc[:,-1].values

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=1,random_state=2)

model=DecisionTreeClassifier(criterion='gini')
model.fit(X_train,y_train)


DecisionTreeClassifier()

**Score for CART Algorithm**

In [None]:
model.score(X_train,y_train)

0.8888888888888888

**Prediction**

In [None]:
if model.predict([[2,1,0]])==1:
    print("Speed of car is fast")
else:
    print("Speed of car is not fast")

Speed of car is fast


In [None]:
if model.predict([[1,0,2]])==1:
    print("Speed of car is fast")
else:
    print("Speed of car is not fast")

Speed of car is not fast
