## Decision Tree 
### Company Data

In [1]:
# A cloth manufacturing company is interested to know about the segment or attributes causes high sale. 
# Approach - A decision tree can be built with target variable Sale (we will first convert it in categorical variable) & all other variable will be independent in the analysis.  


In [2]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import datasets  
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
from sklearn.metrics import classification_report
from sklearn import preprocessing

In [4]:
company = pd.read_csv('Company_Data.csv')
company.head()

Unnamed: 0,Sales,CompPrice,Income,Advertising,Population,Price,ShelveLoc,Age,Education,Urban,US
0,9.5,138,73,11,276,120,Bad,42,17,Yes,Yes
1,11.22,111,48,16,260,83,Good,65,10,Yes,Yes
2,10.06,113,35,10,269,80,Medium,59,12,Yes,Yes
3,7.4,117,100,4,466,97,Medium,55,14,Yes,Yes
4,4.15,141,64,3,340,128,Bad,38,13,Yes,No


In [5]:
#Creating dummy vairables
company=pd.get_dummies(company,columns=['ShelveLoc','Urban','US'], drop_first=True)

In [6]:
cut_labels=['Low','Medium','High']
cut_bins=[-1,5.66,12,17]
company['sales']=pd.cut(company['Sales'],labels=cut_labels,bins=cut_bins)

In [7]:
company.head()

Unnamed: 0,Sales,CompPrice,Income,Advertising,Population,Price,Age,Education,ShelveLoc_Good,ShelveLoc_Medium,Urban_Yes,US_Yes,sales
0,9.5,138,73,11,276,120,42,17,0,0,1,1,Medium
1,11.22,111,48,16,260,83,65,10,1,0,1,1,Medium
2,10.06,113,35,10,269,80,59,12,0,1,1,1,Medium
3,7.4,117,100,4,466,97,55,14,0,1,1,1,Medium
4,4.15,141,64,3,340,128,38,13,0,0,1,0,Low


In [8]:
X = company.iloc[:,1:12]
Y = company.iloc[:,-1]

In [9]:
# Splitting data into train & test
Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.3, random_state=0)

In [10]:
model = DecisionTreeClassifier(criterion = 'entropy')
model.fit(Xtrain,Ytrain)

DecisionTreeClassifier(criterion='entropy')

In [11]:
pred_train = model.predict(Xtrain)
pred_train

array(['Low', 'Low', 'Medium', 'High', 'Medium', 'Medium', 'Low',
       'Medium', 'Medium', 'Medium', 'Medium', 'Low', 'Medium', 'Medium',
       'Medium', 'Medium', 'Medium', 'Medium', 'Low', 'High', 'Medium',
       'Medium', 'High', 'Medium', 'Medium', 'Low', 'Low', 'Medium',
       'Low', 'Low', 'Low', 'Medium', 'Medium', 'Medium', 'Low', 'Low',
       'Medium', 'Medium', 'Low', 'Medium', 'Medium', 'Medium', 'Medium',
       'Medium', 'Low', 'Low', 'Medium', 'Medium', 'Medium', 'Medium',
       'Medium', 'Medium', 'Low', 'Medium', 'Low', 'Medium', 'Low',
       'High', 'Medium', 'Medium', 'Low', 'High', 'Medium', 'Low',
       'Medium', 'Low', 'Medium', 'Medium', 'Medium', 'Low', 'Low', 'Low',
       'High', 'Low', 'Medium', 'Low', 'Medium', 'High', 'High', 'Medium',
       'Medium', 'Medium', 'Medium', 'Medium', 'Medium', 'Medium', 'Low',
       'Medium', 'Medium', 'Low', 'Medium', 'Medium', 'Medium', 'Low',
       'High', 'Low', 'Medium', 'Medium', 'Medium', 'Medium', 'Medium',


In [12]:
#Predicting on test data
preds = model.predict(Xtest)

In [13]:
preds

array(['Medium', 'Medium', 'Medium', 'Low', 'Low', 'Low', 'Medium',
       'Medium', 'Medium', 'Low', 'Medium', 'Medium', 'Low', 'High',
       'Medium', 'Medium', 'High', 'Medium', 'Low', 'Medium', 'Low',
       'Medium', 'Medium', 'Medium', 'Low', 'Medium', 'Low', 'Low',
       'Medium', 'Medium', 'Medium', 'Medium', 'Medium', 'Medium', 'Low',
       'Medium', 'Medium', 'Medium', 'Medium', 'Medium', 'Medium', 'High',
       'Low', 'Medium', 'Low', 'Medium', 'Medium', 'Medium', 'Medium',
       'Medium', 'Low', 'Medium', 'Medium', 'Medium', 'Low', 'Medium',
       'Medium', 'Medium', 'Medium', 'Low', 'Medium', 'High', 'Medium',
       'Low', 'Low', 'Medium', 'Medium', 'Medium', 'Medium', 'Medium',
       'Medium', 'Medium', 'High', 'Medium', 'Medium', 'Medium', 'Medium',
       'High', 'Medium', 'Medium', 'Low', 'Medium', 'Low', 'Medium',
       'High', 'Medium', 'Medium', 'Medium', 'High', 'High', 'Medium',
       'Medium', 'Medium', 'Low', 'Medium', 'Low', 'Medium', 'Low',
       'M

In [14]:
#train Accuracy
np.mean(pred_train==Ytrain)

1.0

In [15]:
# test Accuracy 
np.mean(preds==Ytest)

0.6833333333333333