In [1]:
import numpy as np
import pandas as pd
import pprint
eps = np.finfo(float).eps
from numpy import log2 as log

In [2]:
training_data = pd.read_csv("..//Data/Labelled.csv")
training_data = training_data[["Operator","Network Type","In Out Travelling","State Name","label"]]

In [3]:
training_data

Unnamed: 0,Operator,Network Type,In Out Travelling,State Name,label
0,Airtel,2G,Indoor,Andhra Pradesh,Average
1,Airtel,2G,Indoor,Assam,Very Good
2,Airtel,2G,Indoor,Bihar,Average
3,Airtel,2G,Indoor,Delhi,Poor
4,Airtel,2G,Indoor,Gujarat,Excellent
...,...,...,...,...,...
987,Vodafone,4G,Travelling,Tamil Nadu,Excellent
988,Vodafone,4G,Travelling,Telangana,Excellent
989,Vodafone,4G,Travelling,Uttar Pradesh,Average
990,Vodafone,4G,Travelling,Uttarakhand,Inadequate


In [4]:
def find_entropy(df):
    columns = df.keys()[-1]   #To make the code generic, changing target variable class name
    entropy = 0
    values = df[columns].unique()
    for value in values:
        fraction = df[columns].value_counts()[value]/len(df[columns])
        entropy += -fraction*np.log2(fraction)
    return entropy
  

def find_entropy_attribute(df,attribute):
    column = df.keys()[-1]   #To make the code generic, changing target variable class name
    target_variables = df[column].unique()  #This gives all 'Yes' and 'No'
    variables = df[attribute].unique()    #This gives different features in that attribute (like 'Hot','Cold' in Temperature)
    entropy2 = 0
    for variable in variables:
        entropy = 0
        for target_variable in target_variables:
            num = len(df[attribute][df[attribute]==variable][df[column] ==target_variable])
            den = len(df[attribute][df[attribute]==variable])
            fraction = num/(den+eps)
            entropy += -fraction*log(fraction+eps)
        fraction2 = den/len(df)
        entropy2 += -fraction2*entropy
    return abs(entropy2)


def find_winner(df):
    Entropy_att = []
    IG = []
    for key in df.keys()[:-1]:
#         Entropy_att.append(find_entropy_attribute(df,key))
        IG.append(find_entropy(df)-find_entropy_attribute(df,key))
    return df.keys()[:-1][np.argmax(IG)]
  
def get_subtable(df, node,value):
    return df[df[node] == value].reset_index(drop=True)


def buildTree(df,tree=None): 
    column = df.keys()[-1]   #To make the code generic, changing target variable class name
    
    #Here we build our decision tree

    #Get attribute with maximum information gain
    node = find_winner(df)
    
    #Get distinct value of that attribute e.g Salary is node and Low,Med and High are values
    attValue = np.unique(df[node])
    
    #Create an empty dictionary to create tree    
    if tree is None:                    
        tree={}
        tree[node] = {}
    
   #We make loop to construct a tree by calling this function recursively. 
    #In this we check if the subset is pure and stops if it is pure. 

    for value in attValue:
        
        subtable = get_subtable(df,node,value)
        clValue,counts = np.unique(subtable['label'],return_counts=True)                        
        
        if len(counts)==1:#Checking purity of subset
            tree[node][value] = clValue[0]                                                    
        else:        
            tree[node][value] = buildTree(subtable) #Calling the function recursively 
                   
    return tree
  

In [5]:
tree = buildTree(training_data)

In [6]:
pprint.pprint(tree)

{'State Name': {'Andaman and Nicobar Islands': {'Network Type': {'2G': {'In Out Travelling': {'Indoor': 'Excellent',
                                                                                              'Outdoor': {'Operator': {'BSNL': 'Very '
                                                                                                                               'Good',
                                                                                                                       'Vodafone': 'Excellent'}},
                                                                                              'Travelling': 'Very '
                                                                                                            'Good'}},
                                                                 '3G': 'Inadequate',
                                                                 '4G': {'In Out Travelling': {'Indoor': 'Inadequate',
                              

                'Chandigarh': {'Operator': {'Airtel': {'In Out Travelling': {'Indoor': {'Network Type': {'3G': 'Average',
                                                                                                         '4G': 'Inadequate'}},
                                                                             'Outdoor': {'Network Type': {'3G': 'Excellent',
                                                                                                          '4G': 'Very '
                                                                                                                'Good'}},
                                                                             'Travelling': {'Network Type': {'3G': 'Poor',
                                                                                                             '4G': 'Inadequate'}}}},
                                            'Idea': {'In Out Travelling': {'Indoor': 'Excellent',
                              

                'Karnataka': {'Operator': {'Airtel': {'Network Type': {'2G': 'Poor',
                                                                       '3G': {'In Out Travelling': {'Indoor': 'Very '
                                                                                                              'Good',
                                                                                                    'Outdoor': 'Poor',
                                                                                                    'Travelling': 'Average'}},
                                                                       '4G': {'In Out Travelling': {'Indoor': 'Very '
                                                                                                              'Good',
                                                                                                    'Outdoor': 'Very '
                                                                              

                                            'Idea': {'Network Type': {'2G': {'In Out Travelling': {'Indoor': 'Poor',
                                                                                                   'Travelling': 'Excellent'}},
                                                                      '3G': 'Inadequate',
                                                                      '4G': {'In Out Travelling': {'Indoor': 'Poor',
                                                                                                   'Outdoor': 'Excellent',
                                                                                                   'Travelling': 'Average'}}}},
                                            'Other': {'Network Type': {'2G': {'In Out Travelling': {'Indoor': 'Inadequate',
                                                                                                    'Outdoor': 'Poor',
                                                      

                                             'Vodafone': {'In Out Travelling': {'Indoor': 'Inadequate',
                                                                                'Outdoor': {'Network Type': {'3G': 'Excellent',
                                                                                                             '4G': 'Inadequate'}},
                                                                                'Travelling': {'Network Type': {'3G': 'Very '
                                                                                                                      'Good',
                                                                                                                '4G': 'Inadequate'}}}}}},
                'West Bengal': {'Operator': {'Airtel': {'Network Type': {'2G': 'Very '
                                                                               'Good',
                                                                         

In [7]:
def predict(inst,tree):
    #This function is used to predict for any input variable 
    
    #Recursively we go through the tree that we built earlier

    for nodes in tree.keys():        
        
        value = inst[nodes]
        tree = tree[nodes][value]
        prediction = 0
            
        if type(tree) is dict:
            prediction = predict(inst, tree)
        else:
            prediction = tree
            break;                            
        
    return prediction

In [8]:
inst = training_data.loc[900]
inst

Operator               Vodafone
Network Type                 3G
In Out Travelling    Travelling
State Name                  Goa
label                 Excellent
Name: 900, dtype: object

In [9]:
prediction = predict(inst, tree)
prediction

'Excellent'