In [54]:
#importando biblioteca(s)
import pandas as pd

#Lendo a Base de Dados
dsap = pd.read_excel('DataSetAnimaisCaracteristicas.xlsx')
dsap.columns


Index(['animal_name', 'hair', 'feathers', 'eggs', 'milk', 'airborne',
       'aquatic', 'predator', 'toothed', 'backbone', 'breathes', 'venomous',
       'fins', 'legs', 'tail', 'domestic', 'catsize', 'class_type'],
      dtype='object')

In [55]:
dsap.head()  #Conferindo uma tabela com os dados

Unnamed: 0,animal_name,hair,feathers,eggs,milk,airborne,aquatic,predator,toothed,backbone,breathes,venomous,fins,legs,tail,domestic,catsize,class_type
0,aardvark,1,0,0,1,0,0,1,1,1,1,0,0,4,0,0,1,1
1,antelope,1,0,0,1,0,0,0,1,1,1,0,0,4,1,0,1,1
2,bass,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,0,4
3,bear,1,0,0,1,0,0,1,1,1,1,0,0,4,0,0,1,1
4,boar,1,0,0,1,0,0,1,1,1,1,0,0,4,1,0,1,1


In [56]:
#Conferindo os tipos de dados guardados em cada variável
dsap.dtypes

animal_name    object
hair            int64
feathers        int64
eggs            int64
milk            int64
airborne        int64
aquatic         int64
predator        int64
toothed         int64
backbone        int64
breathes        int64
venomous        int64
fins            int64
legs            int64
tail            int64
domestic        int64
catsize         int64
class_type      int64
dtype: object

In [57]:
#Excluindo a variável animal_name que representa o nome dos animais
dsap.drop('animal_name', axis=1, inplace = True)

In [58]:
#Modelagem de dados, dividindo X e y, que representam respectivamente, os recursos de treinamento (variáveis preditoras) e a variável target
X = dsap[['hair', 'feathers', 'eggs', 'milk', 'airborne',
       'aquatic', 'predator', 'toothed', 'backbone', 'breathes', 'venomous',
       'fins', 'legs', 'tail', 'domestic', 'catsize']]
y = dsap['class_type']
print(X.head())
print(y.head())

   hair  feathers  eggs  milk  airborne  aquatic  predator  toothed  backbone  \
0     1         0     0     1         0        0         1        1         1   
1     1         0     0     1         0        0         0        1         1   
2     0         0     1     0         0        1         1        1         1   
3     1         0     0     1         0        0         1        1         1   
4     1         0     0     1         0        0         1        1         1   

   breathes  venomous  fins  legs  tail  domestic  catsize  
0         1         0     0     4     0         0        1  
1         1         0     0     4     1         0        1  
2         0         0     1     0     1         0        0  
3         1         0     0     4     0         0        1  
4         1         0     0     4     1         0        1  
0    1
1    1
2    4
3    1
4    1
Name: class_type, dtype: int64


In [59]:
#Preparando e Treinando o modelo para prever as classes dos animais
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=50)

from sklearn import tree

modelopreditor = tree.DecisionTreeClassifier (criterion = 'entropy', max_depth = 7)
modelopreditor = modelopreditor.fit(X_train, y_train)
predictions = modelopreditor.predict(X_test)



In [60]:
print(X_test.head())
print(y_test.head())

    hair  feathers  eggs  milk  airborne  aquatic  predator  toothed  \
18     0         0     1     0         0        1         1        1   
73     0         0     1     0         0        1         0        1   
54     1         0     0     1         0        0         1        1   
39     1         0     1     0         1        0         0        0   
17     1         0     0     1         0        0         0        1   

    backbone  breathes  venomous  fins  legs  tail  domestic  catsize  
18         1         0         0     1     0     1         0        1  
73         1         0         0     1     0     1         0        0  
54         1         1         0     0     4     1         0        0  
39         0         1         1     0     6     0         1        0  
17         1         1         0     0     4     1         0        1  
18    4
73    4
54    1
39    6
17    1
Name: class_type, dtype: int64


In [61]:
#Criando uma matriz de confusão
print('\nMatriz de confusão detalhada:\n',
      pd.crosstab(y_test, predictions, rownames = ['Real'],
                  colnames = ['Previsto'],
                  margins = True, margins_name = 'Todos'))


Matriz de confusão detalhada:
 Previsto  1  2  3  4  5  6  7  Todos
Real                                
1         5  0  0  0  0  0  0      5
2         0  7  0  0  0  0  0      7
3         0  0  1  0  0  0  0      1
4         0  0  0  8  0  0  0      8
5         0  0  0  0  1  0  0      1
6         0  0  0  0  0  4  1      5
7         0  0  0  0  0  0  4      4
Todos     5  7  1  8  1  4  5     31


In [62]:
#Apresentando a avaliação do modelo de machine learning aplicado a esse caso
import sklearn.metrics as metrics
print( 'Relatório sobre a qualidade:\n')
print(metrics.classification_report(y_test, predictions,
                                    target_names = ['Mammal', 'bird',
                                                    'Reptile', 'Fish',
                                                    'Amphibian', 'Bug',
                                                    'Invertebrate']))

Relatório sobre a qualidade:

              precision    recall  f1-score   support

      Mammal       1.00      1.00      1.00         5
        bird       1.00      1.00      1.00         7
     Reptile       1.00      1.00      1.00         1
        Fish       1.00      1.00      1.00         8
   Amphibian       1.00      1.00      1.00         1
         Bug       1.00      0.80      0.89         5
Invertebrate       0.80      1.00      0.89         4

    accuracy                           0.97        31
   macro avg       0.97      0.97      0.97        31
weighted avg       0.97      0.97      0.97        31

