In [29]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif, mutual_info_classif

%matplotlib inline

In [30]:
### --------- Carregar as features GLCM extraídas na variável data --------- ###

data = pd.read_csv('Diretório/arquivo.csv')
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 311 entries, 0 to 310
Data columns (total 28 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Unnamed: 0       311 non-null    int64  
 1   Imagem           311 non-null    int64  
 2   contrast_0       311 non-null    float64
 3   contrast_1       311 non-null    float64
 4   contrast_2       311 non-null    float64
 5   contrast_3       311 non-null    float64
 6   dissimilarity_0  311 non-null    float64
 7   dissimilarity_1  311 non-null    float64
 8   dissimilarity_2  311 non-null    float64
 9   dissimilarity_3  311 non-null    float64
 10  homogeneity_0    311 non-null    float64
 11  homogeneity_1    311 non-null    float64
 12  homogeneity_2    311 non-null    float64
 13  homogeneity_3    311 non-null    float64
 14  energy_0         311 non-null    float64
 15  energy_1         311 non-null    float64
 16  energy_2         311 non-null    float64
 17  energy_3        

In [31]:
### --------- Separação entre features e defeitos --------- ###

# Obs: Atentar para a seleção de colunas em X e y

X = data.iloc[:, 2:26]
y = data.iloc[:, 27]

### --------- Seleção das melhores features --------- ###



f_classif = SelectKBest(score_func=f_classif, k=18)

fit = f_classif.fit(X, y)
features = fit.transform(X)


cols = fit.get_support(indices=True)
X = X.iloc[:, cols]
X

Unnamed: 0,contrast_0,contrast_1,contrast_2,contrast_3,dissimilarity_1,homogeneity_1,energy_0,energy_1,energy_2,energy_3,correlation_0,correlation_1,correlation_2,correlation_3,ASM_0,ASM_1,ASM_2,ASM_3
0,1.733085,4.756037,5.575028,5.836713,0.967616,0.701751,0.180111,0.158893,0.151588,0.151478,0.994627,0.985254,0.982730,0.981919,0.032440,0.025247,0.022979,0.022946
1,0.657616,0.889384,1.296878,1.303918,0.433006,0.812905,0.228901,0.207440,0.197448,0.196779,0.989945,0.986440,0.980076,0.979964,0.052396,0.043032,0.038986,0.038722
2,0.586977,1.384693,1.626863,1.702045,0.538195,0.779995,0.224632,0.203925,0.193063,0.192240,0.987204,0.969815,0.964563,0.962925,0.050459,0.041585,0.037273,0.036956
3,0.388601,0.689687,0.887331,0.869944,0.377190,0.832456,0.242860,0.223690,0.211802,0.212310,0.991842,0.985569,0.981352,0.981718,0.058981,0.050037,0.044860,0.045076
4,1.477447,0.899424,2.089232,1.919298,0.478667,0.793281,0.163768,0.183390,0.153360,0.153990,0.992712,0.995563,0.989701,0.990539,0.026820,0.033632,0.023519,0.023713
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
306,3.561685,0.459490,3.597779,3.904436,0.319521,0.850419,0.214329,0.232253,0.202688,0.200725,0.921192,0.989821,0.920329,0.913543,0.045937,0.053941,0.041082,0.040291
307,1.753506,0.545824,2.017830,1.979691,0.357971,0.834932,0.210051,0.215046,0.192881,0.193114,0.953060,0.985367,0.945911,0.946933,0.044122,0.046245,0.037203,0.037293
308,0.530768,0.269557,0.630876,0.638259,0.245236,0.879531,0.208247,0.223908,0.196185,0.195667,0.980133,0.989898,0.976357,0.976074,0.043367,0.050135,0.038488,0.038285
309,3.184094,0.640710,3.230161,3.736531,0.429991,0.801357,0.236039,0.255275,0.219977,0.219427,0.866109,0.973041,0.864146,0.842849,0.055714,0.065165,0.048390,0.048148


In [32]:
### --------- Separação entre dados de treino e de teste --------- ###

# X = data.iloc[:, 2:26]
# y = data.iloc[:, 27]

X_train, X_test, y_train,y_test =train_test_split(X, y, test_size=0.3)

### --------- Padronização dos dados de features --------- ###

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [33]:
### --------- Criação do modelo usando método de aprendizado --------- ###



clf=RandomForestClassifier(n_estimators=40)
clf.fit(X_train,y_train)

RandomForestClassifier(n_estimators=40)

In [34]:
### --------- Geração da Matriz de Confusão --------- ###
y_pred=clf.predict(X_test)
matrix = confusion_matrix(y_test,y_pred)
matrix = pd.DataFrame(matrix)
matrix


Unnamed: 0,0,1,2,3
0,13,1,0,0
1,1,33,2,3
2,1,1,14,0
3,0,10,0,15


In [35]:
### --------- Criação do Relatório de Classificação --------- ###

target_names = ['sem', 'nó', 'trinca', 'resina']

report = classification_report(y_test, y_pred, target_names= target_names, output_dict=True)
df = pd.DataFrame(report).transpose()

df

Unnamed: 0,precision,recall,f1-score,support
sem,0.866667,0.928571,0.896552,14.0
nó,0.733333,0.846154,0.785714,39.0
trinca,0.875,0.875,0.875,16.0
resina,0.833333,0.6,0.697674,25.0
accuracy,0.797872,0.797872,0.797872,0.797872
macro avg,0.827083,0.812431,0.813735,94.0
weighted avg,0.803901,0.797872,0.794005,94.0
