In [82]:

import pickle
import time

#basic
import pandas as pd
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

#tensorflow and keras
from tensorflow import keras
import tensorflow
from tensorflow.keras.layers import Input, Dense, GlobalAveragePooling2D, Flatten, MaxPooling2D, Dropout, Resizing, Rescaling, RandomBrightness, RandomContrast, RandomCrop, RandomFlip, RandomRotation
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras import Model
from keras.utils import load_img, img_to_array
from keras.models import load_model

#sklearn
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import make_column_transformer

#open cv
import cv2 as cv

from cascid.configs import config, pad_ufes
from cascid import database

RANDOM_STATE = 42
IMDIR = pad_ufes.IMAGES_DIR
IMAGE_SHAPE = (300, 300, 3)

In [109]:
def modify_db(db: pd.DataFrame):
    db[['smoke','drink','pesticide','skin_cancer_history','cancer_history','has_piped_water','has_sewage_system','itch','grew','hurt','changed','bleed','elevation','biopsed']] = db[['smoke','drink','pesticide','skin_cancer_history','cancer_history','has_piped_water','has_sewage_system','itch','grew','hurt','changed','bleed','elevation','biopsed']].astype("bool")
    db = db.dropna().drop(['patient_id','lesion_id'], axis = 1)
    db['region'] = db['region'].apply(lambda x: 'OUTROS' if x in ['THIGH','LIP','FOOT','ABDOMEN','SCALP'] else x)
    db['background_father'] = db['background_father'].apply(lambda x: 'OUTROS' if x in ['ISRAEL','SPAIN','AUSTRIA','CZECH','BRASIL','POLAND','PORTUGAL','NETHERLANDS','NORWAY','FRANCE'] else ('BRAZIL' if x == 'BRASIL' else x))
    db['background_mother'] = db['background_mother'].apply(lambda x: 'OUTROS' if x in ['ISRAEL','SPAIN','AUSTRIA','CZECH','BRASIL','POLAND','PORTUGAL','NETHERLANDS','NORWAY','FRANCE'] else ('BRAZIL' if x == 'BRASIL' else x))
    db['is_cancer'] = db['diagnostic'].apply(lambda x: 0 if x in ['ACK','NEV','SEK'] else 1)
    # db.head(5).transpose()

    transformer = make_column_transformer((OneHotEncoder(categories='auto'), ['region','gender']), remainder='passthrough')
    transformed = transformer.fit_transform(db.drop(['diagnostic','biopsed','background_father','background_mother'], axis = 1))
    transformed_db = pd.DataFrame(transformed, columns=transformer.get_feature_names())
    transformed_db.loc[:, transformed_db.columns != 'img_id'] = transformed_db.loc[:, transformed_db.columns != 'img_id'].astype(np.float64)
    # transformed_db.head(5).transpose()
    return transformed_db


db = database.get_db()
transformed_db = modify_db(db)



In [69]:
X = transformed_db.drop("is_cancer", axis = 1)
Y = transformed_db["is_cancer"].to_numpy().astype(np.float32)

In [71]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=RANDOM_STATE)

print("x_train shape: {0}".format(x_train.shape))
print("x_test shape: {0}".format(x_test.shape))
print("y_train shape: {0}".format(y_train.shape))
print("y_test shape: {0}".format(y_test.shape))

x_train = x_train.reset_index()
x_test = x_test.reset_index()

x_train_img = x_train["img_id"]
x_train_rfc = x_train.drop(["img_id"], axis=1)
x_test_img = x_test["img_id"]
x_test_rfc = x_test.drop(["img_id"], axis=1)

x_train shape: (1179, 30)
x_test shape: (295, 30)
y_train shape: (1179,)
y_test shape: (295,)


In [77]:
rfc_top = RandomForestClassifier(n_estimators=x_train.shape[1]*5, random_state=RANDOM_STATE)

def display_scores(scores):
    print('Scores:', scores.round(decimals=2))
    print('Mean:', scores.mean())
    print('Standard deviation:', scores.std())
    
    return scores.mean()

scores_rfc_top = cross_val_score(
    rfc_top,
    x_train_rfc,
    y_train,
    n_jobs=-1,
    scoring="roc_auc",
    cv=5
)

rfc_top.fit(x_train_rfc, y_train)
display_scores(scores_rfc_top)

Scores: [0.83 0.79 0.81 0.83 0.8 ]
Mean: 0.8126098307430274
Standard deviation: 0.014812220786743263


0.8126098307430274

In [None]:
NN = load_model(config.DATA_DIR / 'experiments' / 'fernando' / 'models' / 'deep_learning_effnet')
NN.treinable = False
NN.summary()

In [104]:
class StackedModel():

    def __init__(self, NN: keras.models.Sequential, rfc: RandomForestClassifier) -> None:
        self.nn = NN
        self.rfc_top = rfc
        self.model = self._build_stacker_model()

    def _load_image(self, name: str):
        pil_img = load_img(
            str(IMDIR / name),
            grayscale=False,
            color_mode='rgb',
            target_size=(IMAGE_SHAPE[0], IMAGE_SHAPE[1]),
            interpolation='nearest',
            keep_aspect_ratio=False
        )

        return img_to_array(pil_img, dtype=np.uint8)

    def reader(self, img_path_list):
        return np.array(list(map(self._load_image, img_path_list)))

    def _build_stacker_model(self):
        # Return some model for prediction. This model is used to predict the final value, from the outputs of both input models.
        return RandomForestClassifier(n_estimators=50, random_state=RANDOM_STATE)
    
    def fit(self, x_train_rfc : np.ndarray, x_train_img : np.ndarray, y_train : np.ndarray) -> None:
        y_pred_nn = self.nn.predict(self.reader(x_train_img))
        y_pred_rfc = self.rfc_top.predict_proba(x_train_rfc)
        x_train_stack = np.hstack([y_pred_nn, y_pred_rfc])
        self.model.fit(x_train_stack, y_train)

    def predict(self, x_train_rfc : np.ndarray, x_train_img : np.ndarray, *args, **kwargs):
        y_pred_nn = self.nn.predict(self.reader(x_train_img))
        y_pred_rfc = self.rfc_top.predict_proba(x_train_rfc)
        x_pred_stack = np.hstack([y_pred_nn, y_pred_rfc])
        return self.model.predict(x_pred_stack, *args, **kwargs)       


In [105]:
StackModel = StackedModel(NN, rfc_top)

StackModel.fit(x_train_rfc[:5], x_train_img[:5], y_train[:5])

preds = StackModel.predict(x_test_rfc[:5], x_test_img[:5])

preds