In [90]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import cv2
from skimage.filters import sobel
from skimage.feature import graycomatrix, graycoprops
import os

# Proceso de Entrenamiento
## Parte 1: Agregar las imágenes a un arreglo
### En cuadernos de notas anteriores se realizó el proceso de preprocesamiento de datos necesario para poder aislar lo mayor posible al cerebro del resto del tejido circundante. Con las imágenes ya preprocesadas, estas pasan a la siguiente parte del pipeline. Estas imágenes serán convertidas a formato de openCV para que puedan ser leídas con facilidad. Este arreglo será transformado a un dataframe en los últimos pasos del pipeline.

In [91]:
train_images = [] #Inicializar arreglos en cero
train_labels = [] 

test_images = []
test_labels = []

from pathlib import Path

path_train = Path("datasets/brain-tumor-mri-datasets/Training")
path_test = Path("datasets/brain-tumor-mri-datasets/Testing")

#Labels de entrenamiento
for label in os.listdir(path_train):
    for img in os.listdir(os.path.join(path_train,label)):
        image = cv2.imread(os.path.join(path_train,label,img),0)
        train_images.append(image)
        train_labels.append(label)


#Hacer lo mismo con los de prueba
for label in os.listdir(path_test):
    for img in os.listdir(os.path.join(path_test,label)):
        image = cv2.imread(os.path.join(path_test,label,img),0)
        test_images.append(image)
        test_labels.append(label)

train_images = np.array(train_images, dtype = np.uint8)
train_labels = np.array(train_labels)
test_images = np.array(test_images, dtype = np.uint8)
test_labels = np.array(test_labels)
                           
print("Imprimir la forma de cada arreglo")
print(f"train_images: {train_images.shape} train_labels: {train_labels.shape} test_images: {test_images.shape} test_labels: {test_labels.shape}")


Imprimir la forma de cada arreglo
train_images: (5465, 256, 256) train_labels: (5465,) test_images: (1253, 256, 256) test_labels: (1253,)


In [92]:
#Transformar los labels de forma codificada
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
le.fit(train_labels)
train_labels_encoded = le.transform(train_labels)
le.fit(test_labels)
test_labels_encoded = le.transform(test_labels)

# Parte 2: Extracción de Características
## Se emplea en conjunto pandas y las librerías de skimage (usada para extraer características y manipulación de  imágenes) con el fin de extraer las características más fundamentales de las imágenes. Para esto usaré la técnica de la Gray Level Co-occurence Matrix, una de las técnica que expliqué en mis primeras presentaciones. 

In [93]:
#Extracción de Características
import random
def feature_extractor(dataset):
    
    #Crear DataFrame
    dataset_df = pd.DataFrame()
    distances = np.array([1,3,5,7])
    angles = np.array([0,np.pi/2, np.pi/4, (3*np.pi)/4])
    
    for image in range(dataset.shape[0]):
        #Dataframe provisional
        
        df = pd.DataFrame()
        img = dataset[image, :,:]
        
        for i, (d,angle) in enumerate(zip(distances, angles)):
            #popular el dataframe con los datos de analisis de textura
            GLCM = graycomatrix(img,distances = [d], angles = [angle])
            df[f'contrast{i if i != 0 else ""}'] = graycoprops(GLCM,'contrast')[0]
            df[f'dissimilarity{i if i != 0 else ""}'] = graycoprops(GLCM,'dissimilarity')[0]
            df[f'homogeneity{i if i != 0 else ""}'] = graycoprops(GLCM,'homogeneity')[0]
            df[f'energy{i if i != 0 else ""}'] = graycoprops(GLCM,'energy')[0]
            df[f'correlation{i if i != 0 else ""}'] = graycoprops(GLCM,'correlation')[0]
            df[f'variance{i if i != 0 else ""}'] = graycoprops(GLCM,'mean')[0]
            df[f'std{i if i != 0 else ""}'] = graycoprops(GLCM,'std')[0]
            df[f'entropy{i if i != 0 else ""}'] = graycoprops(GLCM,'entropy')[0]
            
        dataset_df = pd.concat([dataset_df, df], ignore_index=True)

    return dataset_df
            
            
    
    

In [94]:
X_train,X_test,y_train,y_test = train_images,test_images,train_labels_encoded,test_labels_encoded

In [95]:
caracteristicas = feature_extractor(X_train)
X_ML = caracteristicas

In [96]:
train_images.shape[0]

5465

# Parte 3 Entrenamiento del Modelo

In [97]:
n_features = caracteristicas.shape[1]
caracteristicas = np.expand_dims(caracteristicas, axis = 0)
X_ML = np.reshape(caracteristicas, (X_train.shape[0], -1))
from sklearn.ensemble import RandomForestClassifier

RF_Model = RandomForestClassifier(n_estimators = 50, random_state = 42)
RF_Model.fit(X_ML, y_train)

In [98]:
caracteristicas_test = feature_extractor(X_test)
X_test_ML = caracteristicas_test
n_features_test = caracteristicas_test.shape[1]
caracteristicas_test = np.expand_dims(caracteristicas_test, axis = 0)
X_test_ML = np.reshape(caracteristicas_test, (X_test.shape[0], -1))

In [101]:
y_pred = RF_Model.predict(X_test_ML)
from sklearn.metrics import accuracy_score 
accuracy = accuracy_score(y_test, y_pred)

In [102]:
accuracy

0.8036711891460495

# Regresión Lineal

In [103]:
from sklearn.linear_model import LinearRegression

In [104]:
regression = LinearRegression()

In [105]:
regression.fit(X_ML, y_train)

In [107]:
y_pred = regression.predict(X_test_ML)

In [111]:
prediction = np.round(y_pred).astype(int)

In [112]:
accuracy_regression = accuracy_score(y_test,prediction)

In [113]:
accuracy_regression

0.8036711891460495