# Decision Tree Classification

## Importing the libraries

In [25]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [26]:
dataset = pd.read_csv('telecom_churn.csv')
X = dataset.iloc[:, 1:-1].values
y = dataset.iloc[:, -1].values

In [27]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X[:, 2] = le.fit_transform(X[:, 2])

In [28]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X[:, 3] = le.fit_transform(X[:, 3])

## Splitting the dataset into the Training set and Test set

In [29]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

In [30]:
print(X_train)

[[67 415 0 ... 9 2.59 1]
 [101 415 0 ... 5 2.89 1]
 [91 510 1 ... 3 2.67 0]
 ...
 [93 510 1 ... 2 3.0 1]
 [91 415 0 ... 3 2.67 3]
 [130 408 1 ... 2 4.56 5]]


In [31]:
print(y_train)

[False False False ...  True False  True]


In [32]:
print(X_test)

[[92 415 1 ... 5 2.51 0]
 [112 510 0 ... 3 2.7 2]
 [69 510 0 ... 4 3.38 3]
 ...
 [89 415 0 ... 1 3.32 3]
 [54 408 0 ... 7 2.62 2]
 [70 415 0 ... 4 2.78 1]]


In [33]:
print(y_test)

[False False False False  True  True  True False False False False False
 False False  True False False False False False  True False False False
 False False False False False False False False False False  True False
  True False False  True False  True  True  True False False False False
 False False  True False False False False False False False False  True
  True False False False False False False False  True False False False
 False False False False False False  True False False False False False
 False False  True False  True False False False False False False False
 False False False False False False  True False False False False False
  True False False False False False False False False False False False
  True False False False False False False False False False False False
 False False False False False False  True False False False  True False
 False False False False False False False False False False False False
 False False  True False False False  True False Fa

## Feature Scaling

In [34]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [35]:
print(X_train)

[[-0.84138829 -0.53000127 -0.32217788 ...  1.85266441 -0.23301487
  -0.42370879]
 [ 0.01041622 -0.53000127 -0.32217788 ...  0.21567745  0.16272454
  -0.42370879]
 [-0.24011452  1.69868521  3.10387541 ... -0.60281603 -0.12748436
  -1.18492249]
 ...
 [-0.19000837  1.69868521  3.10387541 ... -1.01206277  0.30782899
  -0.42370879]
 [-0.24011452 -0.53000127 -0.32217788 ... -0.60281603 -0.12748436
   1.09871861]
 [ 0.73695537 -0.69422028  3.10387541 ... -1.01206277  2.36567391
   2.62114601]]


In [36]:
print(X_test)

[[-0.21506144 -0.53000127  3.10387541 ...  0.21567745 -0.33854538
  -1.18492249]
 [ 0.28600004  1.69868521 -0.32217788 ... -0.60281603 -0.08791042
   0.33750491]
 [-0.79128214  1.69868521 -0.32217788 ... -0.19356929  0.80909891
   1.09871861]
 ...
 [-0.29022066 -0.53000127 -0.32217788 ... -1.42130951  0.72995102
   1.09871861]
 [-1.16707825 -0.69422028 -0.32217788 ...  1.03417093 -0.19344093
   0.33750491]
 [-0.76622907 -0.53000127 -0.32217788 ... -0.19356929  0.01762009
  -0.42370879]]


## Training the Decision Tree Classification model on the Training set

In [37]:
from sklearn.tree import DecisionTreeClassifier
classifier = DecisionTreeClassifier(criterion = 'entropy', random_state = 0)
classifier.fit(X_train, y_train)

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='entropy',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=0, splitter='best')

## Predicting a new result

## Predicting the Test set results

In [38]:
y_pred = classifier.predict(X_test)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[False False]
 [False False]
 [False False]
 ...
 [False False]
 [False False]
 [False False]]


## Making the Confusion Matrix

In [39]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[669  50]
 [ 22  93]]


0.9136690647482014

## Visualising the Training set results

In [40]:
from matplotlib.colors import ListedColormap
X_set, y_set = sc.inverse_transform(X_train), y_train
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 10, stop = X_set[:, 0].max() + 10, step = 0.25),
                     np.arange(start = X_set[:, 1].min() - 1000, stop = X_set[:, 1].max() + 1000, step = 0.25))
plt.contourf(X1, X2, classifier.predict(sc.transform(np.array([X1.ravel(), X2.ravel()]).T)).reshape(X1.shape),
             alpha = 0.75, cmap = ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
    plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], c = ListedColormap(('red', 'green'))(i), label = j)
plt.title('Decision Tree Classification (Training set)')
plt.xlabel('Age')
plt.ylabel('Estimated Salary')
plt.legend()
plt.show()

ValueError: ignored

## Visualising the Test set results

In [None]:
from matplotlib.colors import ListedColormap
X_set, y_set = sc.inverse_transform(X_test), y_test
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 10, stop = X_set[:, 0].max() + 10, step = 0.25),
                     np.arange(start = X_set[:, 1].min() - 1000, stop = X_set[:, 1].max() + 1000, step = 0.25))
plt.contourf(X1, X2, classifier.predict(sc.transform(np.array([X1.ravel(), X2.ravel()]).T)).reshape(X1.shape),
             alpha = 0.75, cmap = ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
    plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], c = ListedColormap(('red', 'green'))(i), label = j)
plt.title('Decision Tree Classification (Test set)')
plt.xlabel('Age')
plt.ylabel('Estimated Salary')
plt.legend()
plt.show()