In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import cv2 as cv
from skimage import feature
from sklearn.metrics import classification_report
from model_selection import train_test_split_per_class

In [2]:
database = pd.read_csv('group_by6.csv')

In [3]:
database.drop('Unnamed: 0', axis=1, inplace=True)

In [4]:
database = database[1:72]

In [5]:
database.reset_index(inplace=True, drop=True)

In [6]:
database

Unnamed: 0,Matriz mesangial,Celularidade mesangial,Membrana basal glomerular,Espaço de Bowman,Tufo glomerular,Podócito,Necrose tubular aguda,Vacuolização do epitélio,Cristais,Calcificação,...,Hialinose,Necrose Fibrinóide,IMUNOFLUORESCÊNCIA,padrão de deposição,Imunodepósitos,Imunohistoquímica,Microscopia eletrônica,vermelho congo,Clínica,Diagnóstico
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,1,3
1,1,0,0,1,1,1,0,0,0,0,...,0,0,1,1,1,0,2,0,0,3
2,0,0,1,0,0,0,0,0,0,0,...,0,0,1,2,2,1,3,0,0,2
3,2,0,2,1,0,0,0,0,0,0,...,0,0,1,2,2,1,3,0,0,2
4,2,0,3,0,0,0,0,0,0,0,...,0,0,1,2,2,1,3,0,0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
66,1,1,0,0,0,0,0,0,0,0,...,0,0,1,3,20,0,12,0,20,3
67,1,1,0,1,1,0,0,0,0,0,...,0,0,0,0,0,0,12,0,21,3
68,1,0,0,0,0,0,0,0,0,0,...,0,0,1,3,20,0,12,0,10,3
69,1,1,0,1,1,1,0,0,0,0,...,0,0,1,3,20,0,12,0,22,3


In [7]:
distances = [16]
angles = [-np.pi/4, 0, np.pi/4, np.pi/2]

In [8]:
data = {}
for d in distances:
    for a in angles:
        data['contrast_d' + str(d) + "_a" + str(a)] = []
        data['dissimilarity_d' + str(d) + "_a" + str(a)] = []
        data['homogeneity_d' + str(d) + "_a" + str(a)] = []
        data['ASM_d' + str(d) + "_a" + str(a)] = []
        data['energy_d' + str(d) + "_a" + str(a)] = []
        data['correlation_d' + str(d) + "_a" + str(a)] = []

In [9]:
for i in range(1,72):
    image = cv.imread(str(i)+".jpg", cv.IMREAD_GRAYSCALE)
    g = feature.greycomatrix(image, distances=distances, angles=angles, levels=256, symmetric=True, normed=True)
    
    for i in range(len(distances)):
        for j in range(len(angles)):
            data['contrast_d' + str(distances[i]) + "_a" + str(angles[j])].append(feature.greycoprops(g, 'contrast')[i][j])
            data['dissimilarity_d' + str(distances[i]) + "_a" + str(angles[j])].append(feature.greycoprops(g, 'dissimilarity')[i][j])
            data['homogeneity_d' + str(distances[i]) + "_a" + str(angles[j])].append(feature.greycoprops(g, 'homogeneity')[i][j])
            data['ASM_d' + str(distances[i]) + "_a" + str(angles[j])].append(feature.greycoprops(g, 'ASM')[i][j])
            data['energy_d' + str(distances[i]) + "_a" + str(angles[j])].append(feature.greycoprops(g, 'energy')[i][j])
            data['correlation_d' + str(distances[i]) + "_a" + str(angles[j])].append(feature.greycoprops(g, 'correlation')[i][j])

In [10]:
for desc in data:
    database[desc] = data[desc]

In [11]:
database

Unnamed: 0,Matriz mesangial,Celularidade mesangial,Membrana basal glomerular,Espaço de Bowman,Tufo glomerular,Podócito,Necrose tubular aguda,Vacuolização do epitélio,Cristais,Calcificação,...,homogeneity_d16_a0.7853981633974483,ASM_d16_a0.7853981633974483,energy_d16_a0.7853981633974483,correlation_d16_a0.7853981633974483,contrast_d16_a1.5707963267948966,dissimilarity_d16_a1.5707963267948966,homogeneity_d16_a1.5707963267948966,ASM_d16_a1.5707963267948966,energy_d16_a1.5707963267948966,correlation_d16_a1.5707963267948966
0,0,0,0,0,0,0,0,0,0,0,...,0.314088,0.000446,0.021119,0.998020,8.072654,2.123439,0.375465,0.000566,0.023789,0.998772
1,1,0,0,1,1,1,0,0,0,0,...,0.322328,0.000666,0.025816,0.995609,8.078722,2.138821,0.372568,0.000816,0.028570,0.997117
2,0,0,1,0,0,0,0,0,0,0,...,0.198484,0.000455,0.021321,0.987699,38.083610,4.319222,0.247330,0.000577,0.024016,0.992339
3,2,0,2,1,0,0,0,0,0,0,...,0.231651,0.001497,0.038696,0.985053,54.937109,4.739424,0.273475,0.001692,0.041129,0.989838
4,2,0,3,0,0,0,0,0,0,0,...,0.671129,0.074651,0.273224,0.972232,1.603985,0.687197,0.727000,0.085369,0.292180,0.982289
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
66,1,1,0,0,0,0,0,0,0,0,...,0.384726,0.002527,0.050273,0.994474,10.221433,1.896770,0.465273,0.003326,0.057673,0.997020
67,1,1,0,1,1,0,0,0,0,0,...,0.212916,0.002772,0.052651,0.978968,157.398797,7.648214,0.250588,0.003235,0.056881,0.986461
68,1,0,0,0,0,0,0,0,0,0,...,0.281308,0.004581,0.067680,0.989300,44.697142,4.192056,0.319490,0.004979,0.070561,0.991689
69,1,1,0,1,1,1,0,0,0,0,...,0.161875,0.003669,0.060570,0.957730,310.090297,11.282051,0.188422,0.003886,0.062341,0.974224


In [12]:
x = database.drop('Diagnóstico', axis=1)
y = database['Diagnóstico']

In [13]:
x_train,x_test,y_train,y_test = train_test_split_per_class(x,y,test_size=0.2)

In [14]:
n_inputs = x.shape[1]
n_hidden1 = 50
n_hidden2 = 10
n_outputs = len(y.unique())

In [15]:
model = tf.keras.Sequential([tf.keras.layers.Dense(n_hidden1,activation = 'relu'),
                             tf.keras.layers.Dense(n_hidden2,activation = 'relu'),
                             tf.keras.layers.Dense(n_outputs,activation = 'softmax')])

In [16]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [17]:
model.fit(x = x_train.to_numpy(),y = y_train.to_numpy(), batch_size = 10, epochs = 20)

Train on 58 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x150b8d7c988>

In [18]:
y_pred = model.predict(x_test.to_numpy())

In [19]:
y_pred_argmax = []
for pred in y_pred:
    y_pred_argmax.append(np.argmax(pred))

In [20]:
print(classification_report(y_pred = y_pred_argmax, y_true = y_test))

              precision    recall  f1-score   support

           0       0.88      1.00      0.93         7
           1       0.00      0.00      0.00         3
           2       0.00      0.00      0.00         2
           3       0.50      1.00      0.67         4

    accuracy                           0.69        16
   macro avg       0.34      0.50      0.40        16
weighted avg       0.51      0.69      0.57        16



  'precision', 'predicted', average, warn_for)


In [21]:
from sklearn.ensemble import RandomForestClassifier

In [22]:
forest = RandomForestClassifier(n_estimators = 100)

In [23]:
forest.fit(x_train,y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [24]:
y_pred_rf = forest.predict(x_test)

In [25]:
print(classification_report(y_pred = y_pred_rf, y_true = y_test))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         7
           1       1.00      0.67      0.80         3
           2       1.00      1.00      1.00         2
           3       0.80      1.00      0.89         4

    accuracy                           0.94        16
   macro avg       0.95      0.92      0.92        16
weighted avg       0.95      0.94      0.93        16

