In [3]:
# Exercício de Machine Learning: 
# Criar um Modelo de ML, usando RNA Sequencial, p/ o Diagnóstico de doenças em soja.

In [5]:
# Importação das bibliotecas
import pandas as pd
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import confusion_matrix
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import to_categorical

In [7]:
# Upload dos Dados:
dataset = pd.read_csv('../dados/soybean.csv')
dataset.head()

Unnamed: 0,date,plant-stand,precip,temp,hail,crop-hist,area-damaged,severity,seed-tmt,germination,...,sclerotia,fruit-pods,fruit-spots,seed,mold-growth,seed-discolor,seed-size,shriveling,roots,class
0,october,normal,gt-norm,norm,yes,same-lst-yr,low-areas,pot-severe,none,90-100,...,absent,norm,dna,norm,absent,absent,norm,absent,norm,diaporthe-stem-canker
1,august,normal,gt-norm,norm,yes,same-lst-two-yrs,scattered,severe,fungicide,80-89,...,absent,norm,dna,norm,absent,absent,norm,absent,norm,diaporthe-stem-canker
2,july,normal,gt-norm,norm,yes,same-lst-yr,scattered,severe,fungicide,lt-80,...,absent,norm,dna,norm,absent,absent,norm,absent,norm,diaporthe-stem-canker
3,july,normal,gt-norm,norm,yes,same-lst-yr,scattered,severe,none,80-89,...,absent,norm,dna,norm,absent,absent,norm,absent,norm,diaporthe-stem-canker
4,october,normal,gt-norm,norm,yes,same-lst-two-yrs,scattered,pot-severe,none,lt-80,...,absent,norm,dna,norm,absent,absent,norm,absent,norm,diaporthe-stem-canker


In [9]:
dataset.shape

(683, 36)

In [11]:
soybean_attributes = dataset.iloc[:,0:35].values
soybean_attributes

array([['october', 'normal', 'gt-norm', ..., 'norm', 'absent', 'norm'],
       ['august', 'normal', 'gt-norm', ..., 'norm', 'absent', 'norm'],
       ['july', 'normal', 'gt-norm', ..., 'norm', 'absent', 'norm'],
       ...,
       ['june', 'lt-normal', '?', ..., '?', '?', 'rotted'],
       ['april', 'lt-normal', '?', ..., '?', '?', 'rotted'],
       ['june', 'lt-normal', '?', ..., '?', '?', 'rotted']], dtype=object)

In [13]:
soybean_disease = dataset.iloc[:,35].values
soybean_disease

array(['diaporthe-stem-canker', 'diaporthe-stem-canker',
       'diaporthe-stem-canker', 'diaporthe-stem-canker',
       'diaporthe-stem-canker', 'diaporthe-stem-canker',
       'diaporthe-stem-canker', 'diaporthe-stem-canker',
       'diaporthe-stem-canker', 'diaporthe-stem-canker', 'charcoal-rot',
       'charcoal-rot', 'charcoal-rot', 'charcoal-rot', 'charcoal-rot',
       'charcoal-rot', 'charcoal-rot', 'charcoal-rot', 'charcoal-rot',
       'charcoal-rot', 'rhizoctonia-root-rot', 'rhizoctonia-root-rot',
       'rhizoctonia-root-rot', 'rhizoctonia-root-rot',
       'rhizoctonia-root-rot', 'rhizoctonia-root-rot',
       'rhizoctonia-root-rot', 'rhizoctonia-root-rot',
       'rhizoctonia-root-rot', 'rhizoctonia-root-rot', 'phytophthora-rot',
       'phytophthora-rot', 'phytophthora-rot', 'phytophthora-rot',
       'phytophthora-rot', 'phytophthora-rot', 'phytophthora-rot',
       'phytophthora-rot', 'phytophthora-rot', 'phytophthora-rot',
       'phytophthora-rot', 'phytophthora-rot'

In [15]:
label_encoder = LabelEncoder()
soybean_disease = label_encoder.fit_transform(soybean_disease)
for t in range (soybean_attributes.shape[1]):
    soybean_attributes[:,t] = label_encoder.fit_transform(soybean_attributes[:,t]).astype('float32')
soybean_disease

array([10, 10, 10, 10, 10, 10, 10, 10, 10, 10,  7,  7,  7,  7,  7,  7,  7,
        7,  7,  7, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 15, 15, 15, 15,
       15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
       15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
       15, 15,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
        6,  6,  6,  6,  6, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 11, 11,
       11, 11, 11, 11, 11, 11, 11, 11,  5,  5,  5,  5,  5,  5,  5,  5,  5,
        5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
        5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  3,  3,  3,
        3,  3,  3,  3,  3,  3,  3,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
       17, 17, 17, 17, 17, 17, 17, 17, 17, 17,  2,  2,  2,  2,  2,  2,  2,
        2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2, 14, 14, 14, 14,
       14, 14, 14, 14, 14, 14,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1

In [17]:
soybean_disease_dummy = to_categorical(soybean_disease)
soybean_disease_dummy

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [19]:
train_attributes, test_attributes, train_disease, test_disease = train_test_split(soybean_attributes, soybean_disease_dummy,
                                                                                 test_size = 0.3,
                                                                                 random_state = 0)
print(len(train_attributes), len(test_attributes), len(train_disease), len(test_disease))

478 205 478 205


In [21]:
sc = StandardScaler()
train_attributes = sc.fit_transform(train_attributes)
test_attributes = sc.transform(test_attributes)
test_attributes

array([[-0.17567151,  0.92347455, -0.45057756, ...,  0.51257859,
         0.2585467 , -0.02893995],
       [-1.21235033, -0.7613378 ,  1.99687781, ..., -2.25592498,
        -1.98845918,  1.7002223 ],
       [ 0.34266789, -0.7613378 , -1.67430524, ..., -2.25592498,
        -1.98845918,  1.7002223 ],
       ...,
       [ 1.37934671,  0.92347455, -0.45057756, ...,  0.51257859,
         0.2585467 , -0.02893995],
       [-0.17567151, -0.7613378 , -0.45057756, ...,  0.51257859,
         0.2585467 ,  1.7002223 ],
       [ 0.34266789, -0.7613378 ,  1.99687781, ..., -0.87167319,
         2.50555258, -3.48726446]])

In [23]:
RNA_model = Sequential()
RNA_model.add(Dense(units = soybean_attributes.shape[1] + 1, input_dim = soybean_attributes.shape[1]))
RNA_model.add(Dense(units = soybean_attributes.shape[1]))
RNA_model.add(Dense(units = dataset['class'].nunique(), activation = 'softmax'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
I0000 00:00:1737219259.111215   57424 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 757 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 1650, pci bus id: 0000:01:00.0, compute capability: 7.5


In [25]:
RNA_model.summary()

In [27]:
RNA_model.compile(optimizer = 'adam', loss = 'categorical_crossentropy',
                 metrics = ['accuracy'])
RNA_model.fit(train_attributes, train_disease, epochs = 1000,
             validation_data = (test_attributes, test_disease))

Epoch 1/1000


I0000 00:00:1737219264.403598   57495 service.cc:148] XLA service 0x7837f8005bd0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1737219264.403624   57495 service.cc:156]   StreamExecutor device (0): NVIDIA GeForce GTX 1650, Compute Capability 7.5
2025-01-18 13:54:24.429716: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1737219264.539731   57495 cuda_dnn.cc:529] Loaded cuDNN version 90501
2025-01-18 13:54:25.092526: W external/local_xla/xla/service/gpu/nvptx_compiler.cc:930] The NVIDIA driver's CUDA version is 12.4 which is older than the PTX compiler version 12.6.85. Because the driver is older than the PTX compiler version, XLA is disabling parallel compilation, which may slow down compilation. You should update your NVIDIA driver or use the NVIDIA-provided CUDA forward compatibility packages.


[1m 1/15[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m30s[0m 2s/step - accuracy: 0.0000e+00 - loss: 3.5545

I0000 00:00:1737219265.477456   57495 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 149ms/step - accuracy: 0.0374 - loss: 3.2784 - val_accuracy: 0.2244 - val_loss: 2.4630
Epoch 2/1000
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.3099 - loss: 2.2969 - val_accuracy: 0.4585 - val_loss: 1.9421
Epoch 3/1000
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.4395 - loss: 1.8469 - val_accuracy: 0.6195 - val_loss: 1.5797
Epoch 4/1000
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.6100 - loss: 1.4560 - val_accuracy: 0.6634 - val_loss: 1.3306
Epoch 5/1000
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.6692 - loss: 1.2558 - val_accuracy: 0.7220 - val_loss: 1.1378
Epoch 6/1000
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.7194 - loss: 1.0521 - val_accuracy: 0.7512 - val_loss: 0.9873
Epoch 7/1000
[1m15/15[0m [32m━━

<keras.src.callbacks.history.History at 0x7838ef4ab890>

In [28]:
prevision = RNA_model.predict(train_attributes)

[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step


In [31]:
prevision

array([[4.7626459e-19, 1.8226054e-07, 1.4305639e-17, ..., 2.0987928e-16,
        9.6480143e-14, 2.2406534e-19],
       [3.0133004e-27, 4.4532126e-06, 9.9645328e-18, ..., 5.0371468e-30,
        5.4216145e-32, 3.5198844e-26],
       [4.4928978e-08, 2.9116711e-07, 1.4816455e-08, ..., 2.0267885e-08,
        4.5669279e-10, 2.2671455e-10],
       ...,
       [2.2996787e-26, 7.1898285e-08, 9.9999988e-01, ..., 6.1560684e-18,
        7.1296360e-22, 3.8446757e-26],
       [4.1806555e-14, 6.5955970e-07, 3.1037993e-12, ..., 3.2085418e-20,
        1.2114201e-15, 1.8137867e-19],
       [3.8392087e-14, 9.9982762e-01, 3.0286953e-20, ..., 5.6598216e-18,
        5.4572975e-08, 3.3514614e-21]], dtype=float32)