# Decision Tree Classification

## Importing the libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [None]:
dataset = pd.read_csv('df_patients_v20220321 - df_patients_v20220321.csv')
X = dataset.drop(columns=['DocID','PtID','P8','P9'])
X = X.iloc[:,:].values
y = dataset.iloc[:, 9].values

## Encoding categorical data


### Encoding the Independent Variable

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [0])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

In [None]:
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [2])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

In [None]:
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [4])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

In [None]:
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [11])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

In [None]:
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [-1])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

## Splitting the dataset into the Training set and Test set

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

In [None]:
print(X_train)

[[0.0 0.0 1.0 ... 2 25 7]
 [1.0 0.0 0.0 ... 1 34 13]
 [0.0 0.0 1.0 ... 2 24 5]
 ...
 [0.0 0.0 1.0 ... 1 25 6]
 [1.0 0.0 0.0 ... 1 26 13]
 [0.0 0.0 1.0 ... 1 18 4]]


In [None]:
print(y_train)

['A' 'D' 'A' ... 'D' 'D' 'D']


In [None]:
print(X_test)

[[0.0 1.0 0.0 ... 1 20 7]
 [0.0 1.0 0.0 ... 1 21 7]
 [0.0 1.0 0.0 ... 1 22 7]
 ...
 [0.0 0.0 0.0 ... 2 20 6]
 [0.0 0.0 1.0 ... 1 26 9]
 [0.0 0.0 0.0 ... 3 28 12]]


In [None]:
print(y_test)

['A' 'A' 'A' 'D' 'D' 'D' 'D' 'D' 'D' 'D' 'D' 'A' 'A' 'D' 'D' 'A' 'D' 'D'
 'A' 'A' 'A' 'D' 'D' 'A' 'D' 'A' 'D' 'D' 'A' 'A' 'A' 'A' 'D' 'D' 'D' 'A'
 'D' 'A' 'D' 'A' 'A' 'A' 'A' 'A' 'D' 'A' 'A' 'A' 'A' 'D' 'A' 'A' 'D' 'D'
 'D' 'A' 'D' 'D' 'A' 'D' 'D' 'A' 'D' 'D' 'D' 'A' 'D' 'D' 'A' 'A' 'D' 'D'
 'A' 'D' 'A' 'D' 'A' 'D' 'A' 'A' 'A' 'A' 'D' 'A' 'A' 'A' 'D' 'A' 'A' 'D'
 'D' 'D' 'D' 'D' 'A' 'D' 'D' 'A' 'A' 'D' 'A' 'D' 'A' 'D' 'A' 'D' 'A' 'A'
 'D' 'D' 'D' 'A' 'D' 'D' 'A' 'A' 'A' 'D' 'D' 'A' 'A' 'A' 'D' 'A' 'D' 'D'
 'D' 'A' 'D' 'A' 'A' 'D' 'D' 'A' 'D' 'D' 'D' 'A' 'A' 'D' 'D' 'A' 'A' 'D'
 'D' 'D' 'A' 'D' 'D' 'A' 'A' 'D' 'A' 'A' 'A' 'A' 'A' 'D' 'A' 'D' 'D' 'A'
 'D' 'A' 'A' 'A' 'D' 'D' 'A' 'A' 'D' 'D' 'A' 'A' 'A' 'D' 'D' 'D' 'D' 'A'
 'D' 'A' 'D' 'D' 'A' 'D' 'D' 'A' 'A' 'A' 'D' 'A' 'A' 'D' 'D' 'D' 'D' 'D'
 'D' 'D' 'D' 'A' 'A' 'A' 'A' 'A' 'A' 'A' 'D' 'D' 'A' 'A' 'A' 'A' 'D' 'D'
 'D' 'A' 'A' 'A' 'D' 'D' 'A' 'A' 'A' 'D' 'A' 'D' 'D' 'A' 'D' 'A' 'A' 'A'
 'D' 'D' 'A' 'D' 'A' 'D' 'D' 'A' 'A' 'A' 'D' 'A' 'D

## Feature Scaling

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_trainTransformed = sc.fit_transform(X_train[:, -3:])
X_train = np.concatenate([X_train[:,:-3], X_trainTransformed], axis=1)
X_testTransformed = sc.transform(X_test[:, -3:])
X_test = np.concatenate([X_test[:,:-3], X_testTransformed], axis=1)

In [None]:
print(X_train)

[[0.0 0.0 1.0 ... 0.08291977940922753 0.456406770035258
  -0.19541820104301294]
 [1.0 0.0 0.0 ... -1.0493641049374645 2.639807747760975
  2.0129701256071595]
 [0.0 0.0 1.0 ... 0.08291977940922753 0.21380666139906718
  -0.931547643259737]
 ...
 [0.0 0.0 1.0 ... -1.0493641049374645 0.456406770035258
  -0.563482922151375]
 [1.0 0.0 0.0 ... -1.0493641049374645 0.6990068786714487
  2.0129701256071595]
 [0.0 0.0 1.0 ... -1.0493641049374645 -1.2417939904180775
  -1.2996123643680992]]


In [None]:
print(X_test)

[[0.0 1.0 0.0 ... -1.0493641049374645 -0.756593773145696
  -0.19541820104301294]
 [0.0 1.0 0.0 ... -1.0493641049374645 -0.5139936645095051
  -0.19541820104301294]
 [0.0 1.0 0.0 ... -1.0493641049374645 -0.2713935558733144
  -0.19541820104301294]
 ...
 [0.0 0.0 0.0 ... 0.08291977940922753 -0.756593773145696
  -0.563482922151375]
 [0.0 0.0 1.0 ... -1.0493641049374645 0.6990068786714487
  0.5407112411737112]
 [0.0 0.0 0.0 ... 1.2152036637559194 1.1842070959438302
  1.6449054044987974]]


## Training the Decision Tree Classification model on the Training set

In [None]:
from sklearn.tree import DecisionTreeClassifier
classifier = DecisionTreeClassifier(criterion = 'gini', random_state = 0, max_depth=4, min_samples_leaf=4)
classifier.fit(X_train, y_train)

DecisionTreeClassifier(max_depth=4, min_samples_leaf=4, random_state=0)

## Predicting the Test set results

In [None]:
y_pred = classifier.predict(X_test)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[['A' 'A']
 ['A' 'A']
 ['A' 'A']
 ...
 ['A' 'A']
 ['D' 'D']
 ['D' 'D']]


## Making the Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[257  30]
 [134 108]]


0.6899810964083176