# Importing Libraries

In [1]:
import sys
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
import sklearn.tree as tree

# Importing Dataset

In [2]:
df = pd.read_csv("1- drug200.csv", delimiter=",")
df.head()

Unnamed: 0,Age,Sex,BP,Cholesterol,Na_to_K,Drug
0,23,F,HIGH,HIGH,25.355,drugY
1,47,M,LOW,HIGH,13.093,drugC
2,47,M,LOW,HIGH,10.114,drugC
3,28,F,NORMAL,HIGH,7.798,drugX
4,61,F,LOW,HIGH,18.043,drugY


In [3]:
df.shape

(200, 6)

# Pre-processing

In [4]:
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

# Encoding Data

In [5]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()

# Fit and transform the sex
X[:, 1] = label_encoder.fit_transform(X[:, 1])

# Fit and transform the BP
X[:, 2] = label_encoder.fit_transform(X[:, 2])

# Fit and transform the Cholesterol
X[:, 3] = label_encoder.fit_transform(X[:, 3])

In [6]:
y = df["Drug"]
y[0:5]

0    drugY
1    drugC
2    drugC
3    drugX
4    drugY
Name: Drug, dtype: object

# Splitting Data into training & test set 

In [7]:
from sklearn.model_selection import train_test_split


X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=123
)

In [8]:
print(
    f"Shape of X training set: {X_train.shape}\nSize of Y training set: {y_train.shape}"
)

Shape of X training set: (140, 5)
Size of Y training set: (140,)


# Decision Tree Model

In [9]:
regressor = DecisionTreeClassifier(criterion="entropy", max_depth=4)
regressor.fit(X_train, y_train)

# Prediction

In [10]:
predTree = regressor.predict(X_test)
print(predTree[10:15])
print(y_test[10:15])

['drugC' 'drugY' 'drugX' 'drugY' 'drugB']
82     drugC
185    drugY
182    drugX
189    drugY
108    drugB
Name: Drug, dtype: object


# Evaluation

In [11]:
from sklearn import metrics
import matplotlib.pyplot as plt

print("DecisionTrees's Accuracy: ", metrics.accuracy_score(y_test, predTree))

Matplotlib is building the font cache; this may take a moment.


DecisionTrees's Accuracy:  0.9833333333333333


# Visualization

In [12]:
# !conda install -c conda-forge pydotplus -y
# !conda install -c conda-forge python-graphviz -y

Channels:
 - conda-forge
 - defaults
Platform: win-64
Collecting package metadata (repodata.json): ...working... done
Solving environment: ...working... done

## Package Plan ##

  environment location: e:\Courses\IBM-Machine-Learning-with-Python-Course\.conda

  added / updated specs:
    - pydotplus


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    certifi-2024.2.2           |     pyhd8ed1ab_0         157 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         157 KB

The following packages will be SUPERSEDED by a higher-priority channel:

  certifi            pkgs/main/win-64::certifi-2024.2.2-py~ --> conda-forge/noarch::certifi-2024.2.2-pyhd8ed1ab_0 



Downloading and Extracting Packages: ...working... done
Preparing transaction: ...working... done
Verifying transaction: ...working... done
Executing t

In [13]:
from sklearn.tree import export_graphviz
export_graphviz(regressor, out_file='1-tree.dot', filled=True, feature_names=['Age', 'Sex', 'BP', 'Cholesterol', 'Na_to_K'])
!dot -Tpng 1-tree.dot -o 1-tree.png
