### Atividade de classificação usando redes neurais do aluno **Thiago Ribeiro Aragão**

In [4]:
pip install ucimlrepo



In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import tensorflow as tf

Dataset: nível de obesidade (7 classes de rótulos)
---
Link: https://archive.ics.uci.edu/dataset/544/estimation+of+obesity+levels+based+on+eating+habits+and+physical+condition

In [3]:
"""
Código copiado da página do UCI para import do dataset
"""


from ucimlrepo import fetch_ucirepo

# fetch dataset
estimation_of_obesity_levels_based_on_eating_habits_and_physical_condition = fetch_ucirepo(id=544)

# data (as pandas dataframes)
X = estimation_of_obesity_levels_based_on_eating_habits_and_physical_condition.data.features
y = estimation_of_obesity_levels_based_on_eating_habits_and_physical_condition.data.targets

# metadata
print(estimation_of_obesity_levels_based_on_eating_habits_and_physical_condition.metadata)

# variable information
print(estimation_of_obesity_levels_based_on_eating_habits_and_physical_condition.variables)


{'uci_id': 544, 'name': 'Estimation of Obesity Levels Based On Eating Habits and Physical Condition ', 'repository_url': 'https://archive.ics.uci.edu/dataset/544/estimation+of+obesity+levels+based+on+eating+habits+and+physical+condition', 'data_url': 'https://archive.ics.uci.edu/static/public/544/data.csv', 'abstract': 'This dataset include data for the estimation of obesity levels in individuals from the countries of Mexico, Peru and Colombia, based on their eating habits and physical condition. ', 'area': 'Health and Medicine', 'tasks': ['Classification', 'Regression', 'Clustering'], 'characteristics': ['Multivariate'], 'num_instances': 2111, 'num_features': 16, 'feature_types': ['Integer'], 'demographics': ['Gender', 'Age'], 'target_col': ['NObeyesdad'], 'index_col': None, 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 2019, 'last_updated': 'Tue Sep 10 2024', 'dataset_doi': '10.24432/C5H31Z', 'creators': [], 'intro_paper': {'ID': 358, 'type': 

In [6]:
X = pd.get_dummies(X, drop_first=True)
y = LabelEncoder().fit_transform(y)

"""
Divisão dos dados e rótulos em teste e treino
"""
X_treino, X_teste, y_treino, y_teste = train_test_split(X, y, test_size=0.7, random_state=42)

"""
Padronização
"""
scaler = StandardScaler()
X_treino = scaler.fit_transform(X_treino)
X_teste = scaler.transform(X_teste)

"""
Rede neural
"""
model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(X_treino.shape[1],)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(len(set(y)), activation='softmax')
])

model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

"""
Treino do modelo
"""
r = model.fit(X_treino, y_treino, epochs=75, batch_size=16, validation_split=0.2, verbose=0)

"""
Previsão do modelo
"""
y_prev = model.predict(X_teste)
y_prev_classes = y_prev.argmax(axis=1)

"""
Comparação da previsão com os rótulos do conjunto de teste para definição das métricas dos resultados
"""
accuracy = accuracy_score(y_teste, y_prev_classes)
precision = precision_score(y_teste, y_prev_classes, average='weighted')
recall = recall_score(y_teste, y_prev_classes, average='weighted')
f1 = f1_score(y_teste, y_prev_classes, average='weighted')


"""
Com mais de 50 épocas, todas as métricas começam a convergir para 85% sempre
"""
print("Acurácia: " + str(round(accuracy, 2)*100) + "%")
print("Precisão " + str(round(precision, 2)*100) + "%")
print("Recall " + str(round(recall, 2)*100) + "%")
print("F1 Score " + str(round(f1, 2)*100) + "%")


  y = column_or_1d(y, warn=True)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
Acurácia: 26.0%
Precisão 26.0%
Recall 26.0%
F1 Score 24.0%


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Dataset: avaliação de carro (4 classes de rótulos)
---
Link: https://archive.ics.uci.edu/dataset/19/car+evaluation

In [None]:
"""
Código copiado da página do UCI para import do dataset
"""


from ucimlrepo import fetch_ucirepo

# fetch dataset
car_evaluation = fetch_ucirepo(id=19)

# data (as pandas dataframes)
X = car_evaluation.data.features
y = car_evaluation.data.targets

# metadata
print(car_evaluation.metadata)

# variable information
print(car_evaluation.variables)


{'uci_id': 19, 'name': 'Car Evaluation', 'repository_url': 'https://archive.ics.uci.edu/dataset/19/car+evaluation', 'data_url': 'https://archive.ics.uci.edu/static/public/19/data.csv', 'abstract': 'Derived from simple hierarchical decision model, this database may be useful for testing constructive induction and structure discovery methods.', 'area': 'Other', 'tasks': ['Classification'], 'characteristics': ['Multivariate'], 'num_instances': 1728, 'num_features': 6, 'feature_types': ['Categorical'], 'demographics': [], 'target_col': ['class'], 'index_col': None, 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 1988, 'last_updated': 'Thu Aug 10 2023', 'dataset_doi': '10.24432/C5JP48', 'creators': ['Marko Bohanec'], 'intro_paper': {'ID': 249, 'type': 'NATIVE', 'title': 'Knowledge acquisition and explanation for multi-attribute decision making', 'authors': 'M. Bohanec, V. Rajkovič', 'venue': '8th Intl Workshop on Expert Systems and their Applications, 

In [8]:
X = pd.get_dummies(X, drop_first=True)
y = LabelEncoder().fit_transform(y)

"""
Divisão dos dados e rótulos em teste e treino
"""
X_treino, X_teste, y_treino, y_teste = train_test_split(X, y, test_size=0.7, random_state=42)

"""
Padronização
"""
scaler = StandardScaler()
X_treino = scaler.fit_transform(X_treino)
X_teste = scaler.transform(X_teste)

"""
Rede neural
"""
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(X_treino.shape[1],)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(len(set(y)), activation='softmax')
])

model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

"""
Treino do modelo
"""
r = model.fit(X_treino, y_treino, epochs=20, batch_size=16, validation_split=0.2, verbose=0)

"""
Previsão do modelo
"""
y_prev = model.predict(X_teste)
y_prev_classes = y_prev.argmax(axis=1)

"""
Comparação da previsão com os rótulos do conjunto de teste para definição das métricas dos resultados
"""
accuracy = accuracy_score(y_teste, y_prev_classes)
precision = precision_score(y_teste, y_prev_classes, average='weighted')
recall = recall_score(y_teste, y_prev_classes, average='weighted')
f1 = f1_score(y_teste, y_prev_classes, average='weighted')


print("Acurácia: " + str(round(accuracy, 2)*100) + "%")
print("Precisão " + str(round(precision, 2)*100) + "%")
print("Recall " + str(round(recall, 2)*100) + "%")
print("F1 Score " + str(round(f1, 2)*100) + "%")


  y = column_or_1d(y, warn=True)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
Acurácia: 27.0%
Precisão 24.0%
Recall 27.0%
F1 Score 24.0%


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Dataset: tamanhos de lentes de grau para pacientes (3 classes de rótulos)
---
Link: https://archive.ics.uci.edu/dataset/58/lenses

In [None]:
"""
Código copiado da página do UCI para import do dataset
"""


from ucimlrepo import fetch_ucirepo

# fetch dataset
lenses = fetch_ucirepo(id=58)

# data (as pandas dataframes)
X = lenses.data.features
y = lenses.data.targets

# metadata
print(lenses.metadata)

# variable information
print(lenses.variables)


{'uci_id': 58, 'name': 'Lenses', 'repository_url': 'https://archive.ics.uci.edu/dataset/58/lenses', 'data_url': 'https://archive.ics.uci.edu/static/public/58/data.csv', 'abstract': 'Database for fitting contact lenses', 'area': 'Other', 'tasks': ['Classification'], 'characteristics': ['Multivariate'], 'num_instances': 24, 'num_features': 3, 'feature_types': ['Categorical'], 'demographics': ['Age'], 'target_col': ['class'], 'index_col': ['id'], 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 1987, 'last_updated': 'Mon Feb 26 2024', 'dataset_doi': '10.24432/C5K88Z', 'creators': ['J. Cendrowska'], 'intro_paper': None, 'additional_info': {'summary': 'The examples are complete and noise free. The examples highly simplified the problem. The attributes do not fully describe all the factors affecting the decision as to which type, if any, to fit.\r\n\r\n Notes:  \r\n\r\n--This database is complete (all possible combinations of attribute-value pairs are re

In [None]:
X = pd.get_dummies(X, drop_first=True)
y = LabelEncoder().fit_transform(y)

"""
Divisão dos dados e rótulos em teste e treino
"""
X_treino, X_teste, y_treino, y_teste = train_test_split(X, y, test_size=0.7, random_state=42)

"""
Padronização
"""
scaler = StandardScaler()
X_treino = scaler.fit_transform(X_treino)
X_teste = scaler.transform(X_teste)

"""
Rede neural
"""
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(X_treino.shape[1],)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(len(set(y)), activation='softmax')
])

model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

"""
Treino do modelo
"""
r = model.fit(X_treino, y_treino, epochs=1000, batch_size=16, validation_split=0.2, verbose=0)

"""
Previsão do modelo
"""
y_prev = model.predict(X_teste)
y_prev_classes = y_prev.argmax(axis=1)

"""
Comparação da previsão com os rótulos do conjunto de teste para definição das métricas dos resultados
"""
accuracy = accuracy_score(y_teste, y_prev_classes)
precision = precision_score(y_teste, y_prev_classes, average='weighted')
recall = recall_score(y_teste, y_prev_classes, average='weighted')
f1 = f1_score(y_teste, y_prev_classes, average='weighted')


"""
Este é um caso de dataset que não é bom para previsão. Todas suas métricas foram a baixo do esperado. Foi testado diferentes quantidades de camadas, com diferentes quantidades de perceptrons, quantidades maiores de épocas com diferentes tamanhos de batches. Porém, todas suas métricas não mudaram muito do resultado.
"""
print("Acurácia: " + str(round(accuracy, 2)*100) + "%")
print("Precisão " + str(round(precision, 2)*100) + "%")
print("Recall " + str(round(recall, 2)*100) + "%")
print("F1 Score " + str(round(f1, 2)*100) + "%")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
Acurácia: 59.0%
Precisão 35.0%
Recall 59.0%
F1 Score 44.0%


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
