# Classificando com Técnicas Clássicas

## Imports

In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import PCA
import xgboost as xgb
from tensorflow.keras.datasets import cifar10
import time


## Carregando dataset

In [2]:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

In [3]:
# Redimensionando as imagens
x_train = x_train.reshape(x_train.shape[0], -1)
x_test = x_test.reshape(x_test.shape[0], -1)

# Normalizando os dados
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

# Convertendo os rótulos para o formato adequado
y_train = y_train.ravel()
y_test = y_test.ravel()

In [4]:
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=42)

## SVM

(Muito tempo para rodar)

In [None]:
svm_model = svm.SVC(random_state=42, probability=False)
start_time = time.time()
svm_model.fit(x_train, y_train)
end_time = time.time()
svm_training_time = end_time - start_time
print(f"Tempo de treinamento da SVM (sem PCA): {svm_training_time:.2f} segundos")
y_pred_svm = svm_model.predict(x_test)
svm_accuracy = accuracy_score(y_test, y_pred_svm)
print(f"Acurácia da SVM (sem PCA): {svm_accuracy:.4f}")
print("\nRelatório de Classificação (SVM sem PCA):\n", classification_report(y_test, y_pred_svm))
print("\nMatriz de Confusão (SVM sem PCA):\n", confusion_matrix(y_test, y_pred_svm))

## Decomposição do dataset

In [5]:
n_components = 100
pca = PCA(n_components=n_components, whiten=True, random_state=42)
pca.fit(x_train)
x_train_pca = pca.transform(x_train)
x_test_pca = pca.transform(x_test)
x_val_pca = pca.transform(x_val)

In [13]:
svm_model_pca = svm.SVC(kernel='linear', C=1.0, random_state=42, probability=False)
start_time = time.time()
svm_model_pca.fit(x_train_pca, y_train)
end_time = time.time()
svm_pca_training_time = end_time - start_time
print(f"Tempo de treinamento da SVM (com PCA): {svm_pca_training_time:.2f} segundos")

Tempo de treinamento da SVM (com PCA): 1049.83 segundos


In [None]:
y_pred_svm_pca = svm_model_pca.predict(x_test_pca) # Predições com dados de teste transformados
svm_pca_accuracy = accuracy_score(y_test, y_pred_svm_pca)
print(f"Acurácia da SVM (com PCA): {svm_pca_accuracy:.4f}")
print("\nRelatório de Classificação (SVM com PCA):\n", classification_report(y_test, y_pred_svm_pca))


Acurácia da SVM (com PCA): 0.4071

Relatório de Classificação (SVM com PCA):
               precision    recall  f1-score   support

           0       0.46      0.49      0.47      1000
           1       0.46      0.49      0.48      1000
           2       0.30      0.28      0.29      1000
           3       0.31      0.30      0.30      1000
           4       0.36      0.29      0.32      1000
           5       0.35      0.32      0.34      1000
           6       0.40      0.49      0.44      1000
           7       0.46      0.42      0.44      1000
           8       0.50      0.51      0.50      1000
           9       0.45      0.47      0.46      1000

    accuracy                           0.41     10000
   macro avg       0.40      0.41      0.40     10000
weighted avg       0.40      0.41      0.40     10000



## Random Forest

In [6]:
rf_model = RandomForestClassifier(max_depth=10, random_state=42)
start_time = time.time()
rf_model.fit(x_train, y_train)
end_time = time.time()
rf_training_time = end_time - start_time
print(f"Tempo de treinamento da Random Forest: {rf_training_time:.2f} segundos")

Tempo de treinamento da Random Forest: 159.59 segundos


In [12]:
y_pred_rf = rf_model.predict(x_test)
rf_accuracy = accuracy_score(y_test, y_pred_rf)
print(f"Acurácia da Random Forest: {rf_accuracy:.4f}")
print("\nRelatório de Classificação (Random Forest):\n", classification_report(y_test, y_pred_rf))


Acurácia da Random Forest: 0.4248

Relatório de Classificação (Random Forest):
               precision    recall  f1-score   support

           0       0.50      0.51      0.50      1000
           1       0.47      0.50      0.49      1000
           2       0.39      0.16      0.23      1000
           3       0.35      0.17      0.22      1000
           4       0.32      0.44      0.37      1000
           5       0.39      0.38      0.39      1000
           6       0.39      0.55      0.45      1000
           7       0.44      0.41      0.43      1000
           8       0.52      0.58      0.55      1000
           9       0.45      0.55      0.49      1000

    accuracy                           0.42     10000
   macro avg       0.42      0.42      0.41     10000
weighted avg       0.42      0.42      0.41     10000



## XGBoost

In [10]:
xgb_model = xgb.XGBClassifier(
    objective='multi:softmax',
    num_class=10,
    n_estimators=100,
    max_depth=3,
    learning_rate=0.01,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42,
    use_label_encoder=False,
    eval_metric='mlogloss'
)
start_time = time.time()

# Fit the model on the full dataset, using the validation set for early stopping
xgb_model.fit(x_train, y_train, eval_set=[(x_val, y_val)])
end_time = time.time()
xgb_training_time = end_time - start_time
print(f"Tempo de treinamento do XGBoost: {xgb_training_time:.2f} segundos")

Parameters: { "use_label_encoder" } are not used.



[0]	validation_0-mlogloss:2.29878
[1]	validation_0-mlogloss:2.29501
[2]	validation_0-mlogloss:2.29124
[3]	validation_0-mlogloss:2.28762
[4]	validation_0-mlogloss:2.28391
[5]	validation_0-mlogloss:2.28032
[6]	validation_0-mlogloss:2.27685
[7]	validation_0-mlogloss:2.27327
[8]	validation_0-mlogloss:2.26973
[9]	validation_0-mlogloss:2.26631
[10]	validation_0-mlogloss:2.26292
[11]	validation_0-mlogloss:2.25963
[12]	validation_0-mlogloss:2.25632
[13]	validation_0-mlogloss:2.25312
[14]	validation_0-mlogloss:2.24992
[15]	validation_0-mlogloss:2.24673
[16]	validation_0-mlogloss:2.24356
[17]	validation_0-mlogloss:2.24046
[18]	validation_0-mlogloss:2.23740
[19]	validation_0-mlogloss:2.23435
[20]	validation_0-mlogloss:2.23143
[21]	validation_0-mlogloss:2.22838
[22]	validation_0-mlogloss:2.22544
[23]	validation_0-mlogloss:2.22247
[24]	validation_0-mlogloss:2.21951
[25]	validation_0-mlogloss:2.21664
[26]	validation_0-mlogloss:2.21386
[27]	validation_0-mlogloss:2.21102
[28]	validation_0-mlogloss:2.2

In [11]:
y_pred_xgb = xgb_model.predict(x_test)
xgb_accuracy = accuracy_score(y_test, y_pred_xgb)
print(f"Acurácia do XGBoost: {xgb_accuracy:.4f}")
print("\nRelatório de Classificação (XGBoost):\n", classification_report(y_test, y_pred_xgb))

Acurácia do XGBoost: 0.3657

Relatório de Classificação (XGBoost):
               precision    recall  f1-score   support

           0       0.44      0.49      0.46      1000
           1       0.40      0.44      0.42      1000
           2       0.31      0.14      0.19      1000
           3       0.27      0.10      0.15      1000
           4       0.25      0.36      0.30      1000
           5       0.36      0.35      0.35      1000
           6       0.32      0.49      0.39      1000
           7       0.41      0.28      0.34      1000
           8       0.45      0.51      0.48      1000
           9       0.41      0.50      0.45      1000

    accuracy                           0.37     10000
   macro avg       0.36      0.37      0.35     10000
weighted avg       0.36      0.37      0.35     10000

