# Classificação - Dataset Iris com RNN

- **Dataset**: iris
- **Biblioteca**: tensorflow
- **Seleção de hiperparâmetros**: GridSearchCV
- **Performance do modelo**: cross_val_score

> __Objetivo__: Analisar as features das amostras de variações morfológicas de flores coletadas, e efetuar a classificação  
para: _Iris setosa, Iris virginica, Iris versicolor_  
> __Descrição__: O conjunto de dados contém uma amostra de 150 registros com cinco atributos:
- comprimento da sépala;
- largura da sépala;
- comprimento da pétala;
- largura da pétala;
- espécies --> (_class_)

In [None]:
import pandas as pd
import numpy as np
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from yellowbrick.classifier import ConfusionMatrix
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_iris

import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras import backend as k
from tensorflow.keras import utils as np_utils
from keras.models import model_from_json

from scipy import stats
import warnings
from datetime import datetime

import scikeras
from scikeras.wrappers import KerasClassifier

In [2]:
warnings.filterwarnings('ignore')
print(f'Data inicio: {datetime.now()}')

Data inicio: 2025-02-26 17:17:49.195999


In [3]:
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['Class'] = iris.target

df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),Class
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [4]:
stats.describe(iris.data)

DescribeResult(nobs=150, minmax=(array([4.3, 2. , 1. , 0.1]), array([7.9, 4.4, 6.9, 2.5])), mean=array([5.84333333, 3.05733333, 3.758     , 1.19933333]), variance=array([0.68569351, 0.18997942, 3.11627785, 0.58100626]), skewness=array([ 0.31175306,  0.31576711, -0.27212767, -0.10193421]), kurtosis=array([-0.57356795,  0.18097632, -1.39553589, -1.33606741]))

In [5]:
feature_cols = [cname.replace('(cm)', "").strip() 
                for cname in df.select_dtypes(exclude=['int']).columns.values.ravel()]

df = df.rename(columns={'sepal length (cm)': feature_cols[0], 'sepal width (cm)': feature_cols[1],
                       'petal length (cm)': feature_cols[2], 'petal width (cm)': feature_cols[3]})

class_map = {0: 'Iris-setosa', 1: 'Iris-versicolor', 2: 'Iris-virginica'}

df['target'] = df['Class'].map(class_map)
df.drop(columns=['Class'], axis=1, inplace=True)

In [6]:
df.head()

Unnamed: 0,sepal length,sepal width,petal length,petal width,target
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [7]:
x = df.iloc[:, 0:4].values
y = df[['target']]

In [8]:
lbl_enc = LabelEncoder()
y = lbl_enc.fit_transform(y)
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [9]:
y = np_utils.to_categorical(y)

In [10]:
"""
def split_train_test(x:pd.DataFrame, ratio_test):
    df = x.copy()
    shuf_ind = np.random.permutation(len(df))
    set_split = int(len(df) * ratio_test)
    set_test_size = shuf_ind.loc[set_split:]
    set_train_size = shuf_ind.loc[:set_split]

    return df.iloc[set_train_size], df.iloc[set_test_size]
"""

'\ndef split_train_test(x:pd.DataFrame, ratio_test):\n    df = x.copy()\n    shuf_ind = np.random.permutation(len(df))\n    set_split = int(len(df) * ratio_test)\n    set_test_size = shuf_ind.loc[set_split:]\n    set_train_size = shuf_ind.loc[:set_split]\n\n    return df.iloc[set_train_size], df.iloc[set_test_size]\n'

In [11]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=0)

In [12]:
classifier = Sequential()
classifier.add(Dense(units=8, activation='relu', kernel_initializer='normal', input_dim=4))
classifier.add(Dropout(rate=0.2))
classifier.add(Dense(units=8, activation='relu', kernel_initializer='normal'))
classifier.add(Dropout(rate=0.2))
classifier.add(Dense(units=3, activation='softmax'))
    
classifier.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
classifier.fit(x_train, y_train, batch_size=10, epochs=100)   

Epoch 1/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.3702 - loss: 1.0921
Epoch 2/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5635 - loss: 1.0815 
Epoch 3/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5504 - loss: 1.0768 
Epoch 4/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5976 - loss: 1.0704 
Epoch 5/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6724 - loss: 1.0534 
Epoch 6/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5735 - loss: 1.0506 
Epoch 7/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5999 - loss: 1.0314 
Epoch 8/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6206 - loss: 1.0009 
Epoch 9/100
[1m12/12[0m [32m━━━━━━━━━━

<keras.src.callbacks.history.History at 0x2988880a240>

## 1. Salvar o classificador

In [13]:
neural_net_json = classifier.to_json()
with open('classifier_iris.json', 'w') as json_file:
    json_file.write(neural_net_json)
classifier.save_weights('classifier_iris.weights.h5')    

## 2. Carregar o classificador

In [14]:
file = open('classifier_iris.json', 'r')
struct_classifier = file.read()
file.close()
loaded_classifier = model_from_json(struct_classifier)
loaded_classifier.load_weights('classifier_iris.weights.h5')

In [15]:
y_pred = classifier.predict(x_test)
y_pred = y_pred > 0.5
y_pred

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step


array([[False, False,  True],
       [False,  True, False],
       [ True, False, False],
       [False, False,  True],
       [ True, False, False],
       [False, False,  True],
       [ True, False, False],
       [False,  True, False],
       [False,  True, False],
       [False,  True, False],
       [False, False,  True],
       [False,  True, False],
       [False,  True, False],
       [False,  True, False],
       [False,  True, False],
       [ True, False, False],
       [False,  True, False],
       [False,  True, False],
       [ True, False, False],
       [ True, False, False],
       [False, False,  True],
       [False,  True, False],
       [ True, False, False],
       [ True, False, False],
       [False, False,  True],
       [ True, False, False],
       [ True, False, False],
       [False,  True, False],
       [False,  True, False],
       [ True, False, False],
       [False, False,  True],
       [False,  True, False],
       [ True, False, False],
       [Fa

In [16]:
accuracy_score(y_test, y_pred)

0.9736842105263158