# Árboles de decisión
- Un árbol de decisión es un modelo de predicción utilizado en diversos ámbitos que van desde la inteligencia artificial hasta la economía.
- Dado un conjuntos de datos, se fabrican diagramas de construcciones lógicas, que sirven para representar y categorizar una serie de condiciones que ocurren de forma sucesiva, para la resolición de un problema.
- Los árboles de decisión se pueden clasificar en tres tipos:
    - Árboles de decisión binarios: se utilizan para clasificar datos binarios.
    - Árboles de decisión múltiples: se utilizan para clasificar datos que pueden tomar múltiples valores.
    - Árboles de decisión no lineales: se utilizan para clasificar datos que no siguen una línea lineal.

    ![image.png](attachment:image.png)

# Árbol de decisión - clasificación

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
vinos = pd.read_csv('vino.csv')
vinos.head()

Unnamed: 0,Alcohol,Malic acid,Ash,Alcalinity of ash,Magnesium,Total phenols,Flavanoids,Nonflavanoid phenols,Proanthocyanins,Color intensity,Hue,OD280/OD315 of diluted wines,Proline,Wine Type
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0,One
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0,One
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0,One
3,14.37,1.95,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480.0,One
4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735.0,One


In [5]:
vinos['Wine Type'].unique()

array(['One', 'Two', 'Three'], dtype=object)

In [6]:
vinos['Wine Type'].value_counts()

Wine Type
Two      71
One      59
Three    48
Name: count, dtype: int64

In [8]:
X = vinos.drop(['Wine Type'] , axis=1)
y = vinos['Wine Type']

In [9]:
from sklearn.model_selection import train_test_split

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [13]:
X_train

Unnamed: 0,Alcohol,Malic acid,Ash,Alcalinity of ash,Magnesium,Total phenols,Flavanoids,Nonflavanoid phenols,Proanthocyanins,Color intensity,Hue,OD280/OD315 of diluted wines,Proline
114,12.08,1.39,2.50,22.5,84.0,2.56,2.29,0.43,1.04,2.90,0.93,3.19,385.0
109,11.61,1.35,2.70,20.0,94.0,2.74,2.92,0.29,2.49,2.65,0.96,3.26,680.0
134,12.51,1.24,2.25,17.5,85.0,2.00,0.58,0.60,1.25,5.45,0.75,1.51,650.0
131,12.88,2.99,2.40,20.0,104.0,1.30,1.22,0.24,0.83,5.40,0.74,1.42,530.0
162,12.85,3.27,2.58,22.0,106.0,1.65,0.60,0.60,0.96,5.58,0.87,2.11,570.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9,13.86,1.35,2.27,16.0,98.0,2.98,3.15,0.22,1.85,7.22,1.01,3.55,1045.0
148,13.32,3.24,2.38,21.5,92.0,1.93,0.76,0.45,1.25,8.42,0.55,1.62,650.0
46,14.38,3.59,2.28,16.0,102.0,3.25,3.17,0.27,2.19,4.90,1.04,3.44,1065.0
169,13.40,4.60,2.86,25.0,112.0,1.98,0.96,0.27,1.11,8.50,0.67,1.92,630.0


In [14]:
from sklearn.tree import DecisionTreeClassifier

In [15]:
arbol = DecisionTreeClassifier()

In [16]:
arbol.fit(X_train, y_train)

In [18]:
predicciones = arbol.predict(X_test)
predicciones

array(['Three', 'One', 'Two', 'Three', 'Two', 'One', 'One', 'Two',
       'Three', 'Two', 'One', 'One', 'Three', 'One', 'Three', 'One',
       'Three', 'One', 'Two', 'Three', 'One', 'Two', 'Two', 'Two',
       'Three', 'One', 'Three', 'One', 'One', 'Three', 'Two', 'Two',
       'One', 'Three', 'Three', 'Two', 'Two', 'One', 'One', 'One', 'One',
       'Three', 'Two', 'Three', 'Two', 'Two', 'Two', 'Three', 'Two',
       'Three', 'Three', 'One', 'Three', 'Two'], dtype=object)

In [19]:
y_test

139    Three
13       One
110      Two
138    Three
112      Two
5        One
53       One
79       Two
150    Three
108      Two
12       One
71       Two
176    Three
6        One
149    Three
25       One
177    Three
15       One
101      Two
133    Three
40       One
64       Two
27       One
100      Two
159    Three
47       One
70       Two
42       One
21       One
145    Three
128      Two
116      Two
2        One
144    Three
152    Three
76       Two
123      Two
32       One
30       One
55       One
57       One
173    Three
92       Two
146    Three
113      Two
60       Two
161    Three
142    Three
102      Two
118      Two
171    Three
158    Three
156    Three
80       Two
Name: Wine Type, dtype: object

In [21]:
from sklearn.metrics import confusion_matrix, classification_report

In [22]:
print(classification_report(y_test, predicciones))

              precision    recall  f1-score   support

         One       0.89      0.94      0.91        17
       Three       0.89      0.89      0.89        18
         Two       0.89      0.84      0.86        19

    accuracy                           0.89        54
   macro avg       0.89      0.89      0.89        54
weighted avg       0.89      0.89      0.89        54



In [23]:
print(confusion_matrix(y_test,predicciones))

[[16  0  1]
 [ 1 16  1]
 [ 1  2 16]]
