In [None]:
import pandas as pd
import numpy as np
INPUT_FOLDER = 'data/'

In [None]:
FILE_NAME = 'sample.csv'
data = pd.read_csv(INPUT_FOLDER+FILE_NAME)
data.head()

In [None]:
data.dtypes

In [None]:
data.nunique()

In [None]:
def preprocess_data(data):
  # Drop ID
  if 'ID' in data.columns:
    data = data.drop('ID', axis=1)

  # Apply dummy values in dataframe
  data = pd.get_dummies(data)

  # Drop excess columns
  try:
    excess_columns = ['Age_>35', 'Income_Medium', 'Gender_Male', 'Marital Status_Single', 'Buys_No']
    data = data.drop(excess_columns, axis=1)
  except Exception as e:
    print(e)
    
  return data

In [None]:
# Build a model
from sklearn.tree import DecisionTreeClassifier

def fit_tree(X, y):
  dtree = DecisionTreeClassifier(criterion="entropy")
  dtree.fit(X, Y)
  return dtree

In [None]:
# Visualize the tree

from sklearn.externals.six import StringIO
from IPython.display import Image
from sklearn.tree import export_graphviz 
import pydotplus
import matplotlib.pyplot as plt

def visualize_tree(trained_decision_tree):

  try:
    dot_data = StringIO()

    export_graphviz(trained_decision_tree, out_file=dot_data,
                    filled=True, rounded=True,
                    special_characters=True)
    
    graph = pydotplus.graph_from_dot_data(dot_data.getvalue())
    return Image(graph.create_png())
  except Exception as e:
    print(e)
    return False

In [None]:
# Main 

data = preprocess_data(data)

X = data.iloc[:, :-1].values
Y = data.iloc[:, -1].values

dtree = fit_tree(X, Y)

visualize_tree(dtree)