#### Several works in the field of automatic tumor diagnosis can be divided into two main categories, namely feature extraction and sample classification. 
#### In general, the image properties are extracted first. These features usually include static features such as entropy, skewness, mean, energy, torque, and correlation or properties obtained by applying other algorithms

In [3]:
import numpy as np
import keras
import tensorflow as tf
import cv2
import os
import imutils
import shutil
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import array_to_img, img_to_array, load_img

import os
import cv2
import numpy as np
from skimage import io, img_as_ubyte
from kapur import kapur_threshold

from matplotlib import pyplot as plt
from skimage import io, img_as_ubyte
from kapur import kapur_threshold

from PIL import Image
from tensorflow.keras.layers import Conv2D, Input, GlobalAveragePooling2D, ZeroPadding2D, BatchNormalization, Activation, MaxPool2D, Flatten, Dense, Dropout
from tensorflow.keras.models import Model, load_model, save_model

from tensorflow.keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
from keras import backend

from keras import applications
from keras.models import Sequential

from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.optimizers import Adam

from tqdm import tqdm
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

### 0. Data Augmentation

In [4]:
def create_dir(newdir, empty = True):
    """
    create new folder if the target folder doesnt exist
    """
    CHECK_FOLDER = os.path.isdir(newdir)
    # If folder doesn't exist, then create it.
    if not CHECK_FOLDER:
        os.makedirs(newdir)
        print("created folder : ", newdir)

    else:
        if empty == True:
            ## whether to remove all contents in the current augmented data folder and generate new ones
            shutil.rmtree(newdir)
            print("current augmented data removed")
            os.makedirs(newdir)
        print(newdir, "folder already exists.")
        
## save the augmented data and the original ones in new folders        
def data_augmentation(refresh=True, num=5):
    """
    refresh: whether to replace current augmented data and generate new ones
    num: number of augmented data per image
    """

    training_path = "data\\Training"
    ## destination parent folder for augmented data
    augmented_path = "data\\augmentation_training"
    current_directory = os.getcwd()
    original_path = os.path.join(current_directory, training_path)
    augmented_path = os.path.join(current_directory, augmented_path)

    ## augmented data generator
    image_generator = ImageDataGenerator(rotation_range=90, shear_range=0.4, zoom_range=0,
                                         samplewise_center=True, vertical_flip=True, horizontal_flip=True,
                                         samplewise_std_normalization=True)
    for subf in os.listdir(original_path):
        new_dir = os.path.join(augmented_path, subf)
        create_dir(new_dir, empty=refresh)
        for f in os.listdir(os.path.join(original_path, subf)):
            image_path = os.path.join(original_path, subf, f)
            img = load_img(image_path)
            x = img_to_array(img)
            x = x.reshape((1,) + x.shape)  # reshape to (1, height, width, channels)
            i = 1
            for batch in image_generator.flow(x, batch_size=1,
                                              save_to_dir=new_dir,
                                              save_prefix=f.split(".")[0],
                                              save_format='jpg'):
                i += 1
                if i > num:
                    break

In [5]:
data_augmentation()

created folder :  D:\project\data\augmentation_training\glioma


KeyboardInterrupt: 

### 1. Image Preprocessing

In [6]:
def blur_and_crop(image, blur = "median", cropping= False, kernel = 5, masking = True, plot=False):
    """
    preprocessing:
    1. convert to grayscale and blur the image using median or gaussian filter
    2. (optional)apply kapur thresholding to create a mask, mask the blurred image
    3. crop the image to contain only the brain image, leaving the blank around surrounding the brain out.
    """

    # Convert the image to grayscale, and blur it slightly
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    if blur == "median":
        blurred = cv2.medianBlur(gray, kernel)
    elif blur == "gaussian":
        blurred = cv2.GaussianBlur(gray, (kernel, kernel), 0)

    # Threshold the image, then perform a series of erosions +
    # dilations to remove any small regions of noise

    if masking == True:
   ## creating mask with kapur thresholding
        threshold = kapur_threshold(blurred)
        binr = cv2.threshold(blurred, threshold, 255, cv2.THRESH_BINARY)[1]
        masked_image = cv2.bitwise_and(blurred, blurred, mask=binr)
    else:
        masked_image = blurred
        
    if cropping == True:
        thresh = cv2.threshold(masked_image, 45, 255, cv2.THRESH_BINARY)[1]
        thresh = cv2.erode(thresh, None, iterations=2)
        thresh = cv2.dilate(thresh, None, iterations=2)

        # Find contours in thresholded image, then grab the largest one
        cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        cnts = imutils.grab_contours(cnts)
        c = max(cnts, key=cv2.contourArea)


        # Find the extreme points for cropping
        extLeft = tuple(c[c[:, :, 0].argmin()][0])
        extRight = tuple(c[c[:, :, 0].argmax()][0])
        extTop = tuple(c[c[:, :, 1].argmin()][0])
        extBot = tuple(c[c[:, :, 1].argmax()][0])

        # crop new image out of the original image using the four extreme points (left, right, top, bottom)
        cropped_image = masked_image[extTop[1]:extBot[1], extLeft[0]:extRight[0]]      
    else:
        cropped_image = masked_image

    if plot:
        plt.figure(figsize=(10, 10))
        plt.subplot(131), plt.imshow(image, cmap='gray'), plt.title('Original Image')
        plt.subplot(132), plt.imshow(masked_image, cmap='gray'), plt.title('Masked Image')
        plt.subplot(133), plt.imshow(cropped_image, cmap='gray'), plt.title('Cropped and Masked Image')
        
        plt.show()
    

    return cropped_image

img = cv2.imread(r"C:\Users\chenj\Desktop\project-JieChen\data\Training\meningioma\Tr-me_0011.jpg")
new_img = blur_and_crop(img, blur = "median", cropping = False, kernel = 5, masking = True, plot=True)

error: OpenCV(4.8.0) D:\a\opencv-python\opencv-python\opencv\modules\imgproc\src\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'


In [5]:
training_path = "data\\Training"
"\\augmentation_".join(training_path.split("\\"))

'data\\augmentation_Training'

In [6]:
def preprocessing(training_path, masking=False, crop=False):
    """
    preprocess the images in training_path parent folder
    1. create a destination folder for preprocessed images
    2. blur, mask and (crop) the images, masking is optional.
    3. store the processed images in new folder
    4. 提取图像特征并保存特征向量
    
    parameter: 
    training_path: the folder name for the original images to be processed
    masking: if masking is applied in the processing
    """
    
    current_directory = os.getcwd()
    ## destination parent folder for processed data
    if masking == True:
        processed_path = "\\Processed_".join(training_path.split("\\"))
    else:
        processed_path = "\\Unmasked_Processed_".join(training_path.split("\\"))
    
    processed_path = os.path.join(current_directory, processed_path)
    original_path = os.path.join(current_directory, training_path)
    
    # 4. 提取图像特征并保存特征向量
    feature_extractor = VGG16(weights='imagenet', include_top=False, input_shape=(512, 512, 3))
    feature_extractor.trainable = False  # 冻结特征提取器的权重，只训练新加入的全连接层
    
    # 提取特征并保存特征向量
    features_list = []
    labels_list = []
    for subf in tqdm(os.listdir(original_path), desc="Folders"):
        new_dir = os.path.join(processed_path, subf)
        create_dir(new_dir, empty=True)

        for f in tqdm(os.listdir(os.path.join(original_path, subf)), desc="Images"):
            image_path = os.path.join(original_path, subf, f)
            img = cv2.imread(image_path)
            img = cv2.resize(img, (512, 512))
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # VGG16使用RGB格式

            # 保存预处理后的图像
            new_img = blur_and_crop(img, blur="median", cropping=crop, kernel=5, masking=masking, plot=False)
            image = Image.fromarray(new_img)
            image.save(os.path.join(new_dir, f))

            # 提取特征
            features = feature_extractor.predict(np.expand_dims(img, axis=0))
            features_list.append(features.flatten())  # 将特征展平为向量
            labels_list.append(subf)  # 添加标签

    # 保存特征向量为Numpy数组
    features_array = np.array(features_list)
    np.save('features.npy', features_array)
    # 保存标签为Numpy数组
    labels_array = np.array(labels_list)
    np.save('labels.npy', labels_array)


In [None]:
# preprocessing(training_path="data\\Training", masking=False)
# preprocessing(training_path="data\\Training", masking=True)
# # 这里python会崩

: 

In [None]:
testing_path = "data\\Testing"

In [None]:
def load_data(path):
    # 加载预处理后的特征向量和标签
    features = np.load('features.npy')
    labels = np.load('labels.npy')
    return features, labels

def build_model(input_shape, num_classes):
    # 构建模型
    model = Sequential()
    model.add(Flatten(input_shape=input_shape))
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))
    return model

def train_model(features, labels):
    # 标签编码为数字
    label_encoder = LabelEncoder()
    labels_encoded = label_encoder.fit_transform(labels)

    # 进行独热编码
    onehot_encoder = OneHotEncoder(sparse=False)
    labels_onehot = onehot_encoder.fit_transform(labels_encoded.reshape(-1, 1))
    
    # 设置输入形状和类别数量
    input_shape = features.shape[1:]
    num_classes = len(label_encoder.classes_)
    
    # 构建模型
    model = build_model(input_shape, num_classes)
    
    # 编译模型
    model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.0001), metrics=['accuracy'])
    
    # 训练模型
    model.fit(features, labels_onehot, batch_size=32, epochs=10)
    
    return model
if __name__ == '__main__':
    features, labels = load_data("data\\Training")
    
    # 训练模型
    model = train_model(features, labels)

    # 加载测试集数据
    testing_path = "data\\Testing"
    testing_features, testing_labels = load_data(testing_path)
    
    # 在测试集进行评估
    testing_labels_encoded = LabelEncoder().fit_transform(testing_labels)
    testing_labels_onehot = OneHotEncoder(sparse=False).fit_transform(testing_labels_encoded.reshape(-1, 1))
    
    loss, accuracy = model.evaluate(testing_features, testing_labels_onehot, batch_size=32)
    print(f"Testing Loss: {loss}, Testing Accuracy: {accuracy}")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
