# Decision Tree

In [1]:
import pandas as pd 
from sklearn.tree import DecisionTreeClassifier

from sklearn.preprocessing import LabelEncoder

# Load Data

In [2]:
df = pd.read_csv("./datasets/immun.csv")
df.head()

Unnamed: 0,age,sickness,city,immun
0,old,yes,Aachen,low
1,old,no,Aachen,low
2,old,no,Aachen,low
3,mid,yes,Berlin,low
4,mid,yes,Berlin,low


# Split Data into Input and Output

In [4]:
inputs = df.drop("immun", axis = "columns")
inputs.head()


target = df[["immun"]]
target.head()


Unnamed: 0,immun
0,low
1,low
2,low
3,low
4,low


# Encoding

Convert Strings to numbers

Age: 
old : 0
mid: 1
new: 2

In [5]:
le_age = LabelEncoder()
le_sickness = LabelEncoder()
le_city = LabelEncoder()
le_immun = LabelEncoder()


inputs["age"] = le_age.fit_transform(inputs["age"])
inputs["sickness"] = le_sickness.fit_transform(inputs["sickness"])
inputs["city"] = le_city.fit_transform(inputs["city"])

target["immun"] = le_immun.fit_transform(target["immun"])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  target["immun"] = le_immun.fit_transform(target["immun"])


In [6]:
inputs.head()

Unnamed: 0,age,sickness,city
0,2,1,0
1,2,0,0
2,2,0,0
3,0,1,1
4,0,1,1


# See the Numbers(Classes) for each encoder

In [7]:
encoders = [le_age, le_sickness, le_city, le_immun] 

for encoder in encoders:

    encoding = {}

    for i in list(encoder.classes_):
        encoding[i] = encoder.transform([i])[0]

    print(encoding)


{'mid': 0, 'new': 1, 'old': 2}
{'no': 0, 'yes': 1}
{'Aachen': 0, 'Berlin': 1, 'Frankfurt': 2}
{'high': 0, 'low': 1}


# Build and Train the model

In [8]:
model = DecisionTreeClassifier()

model.fit(inputs, target)

# Evaluate and Score

In [9]:
model.score(inputs, target)

1.0

# Manual Prediction

In [10]:

age = 0 
sickness = 1 
city  = 1

model.predict([[ age, sickness, city]])



array([1])

# Read Tree Information

In [15]:
print(model.classes_) # [0 1]
print(model.feature_names_in_) # ['age' 'sickness' 'city']
print(model.n_outputs_) # 1
print(model.get_depth()) # 3
print(model.get_n_leaves()) # 4

[0 1]
['age' 'sickness' 'city']
1
3
4
