# 1) Imports

In [None]:
from sklearnex import patch_sklearn
patch_sklearn()

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns

import os

In [None]:
# Modelos
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression, SGDClassifier
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier

# Separar/validar dados
from sklearn.model_selection import train_test_split, cross_val_score, cross_val_predict
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn import preprocessing

# Redução de dimensionalidade
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA

# Representação vetorial para imagem
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.applications.efficientnet import (
	EfficientNetB0, EfficientNetB1,
	EfficientNetB2, EfficientNetB3,
	EfficientNetB4, EfficientNetB5,
	EfficientNetB6, EfficientNetB7
)
from tensorflow.keras.applications.resnet50 import ResNet50

from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.efficientnet import preprocess_input

# 2) Carregar imagens de clusters em pastas

In [None]:
percentage_test_size = 0.2
undersampling_size = 1300 # -1 para não fazer undersampling

diretorio = './clusters'

In [None]:
targets = next(os.walk(diretorio))[1]

targets = targets[1:len(targets)]

dados = []

for classe in targets:
    imagens = os.listdir(os.join([diretorio,classe]))
    i = 0
    for img in imagens:
        i+=1
        
        if img.endswith('.jpg'):
            caminho_imagem = os.join([diretorio, classe, '/', img])
            dados.append([caminho_imagem, classe])
        
        if(i == undersampling_size and undersampling_size != -1):
            break


# 3) Separar features e targets

In [None]:
dados = np.array(dados)

np.random.shuffle(dados)
            
df_dados = pd.DataFrame(dados, columns=['image','target'])

df_dados

In [None]:
def getEmbedder():
    # return EfficientNetB0(weights='imagenet', include_top=False)
    # return EfficientNetB2(weights='imagenet', include_top=False)
    return EfficientNetB3(weights='imagenet', include_top=False)
    # return EfficientNetB4(weights='imagenet', include_top=False)
    # return EfficientNetB7(weights='imagenet', include_top=False)
    # return InceptionV3(weights='imagenet', include_top=False)

In [None]:
largura, altura = 150, 150

def embedding(embedder,img_path):
    img = image.load_img(img_path, target_size=(altura, largura))
    embed = image.img_to_array(img)
    embed = np.expand_dims(embed, axis=0)
    # embed = preprocess_input(embed)

    features = embedder.predict(embed)

    return features[0][0][0]

embedder = getEmbedder()

In [None]:
target = df_dados['target'].array

# array_features = []
# for x in dados:
#     array_features.append(embedding(embedder,x[0]))
array_features = pd.DataFrame(dados).apply(lambda x: embedding(embedder,x['image']), axis=1)

scaler = preprocessing.StandardScaler()

array_features = scaler.fit_transform(array_features)

df_features = pd.DataFrame(array_features)

df_features

# 4) Reduzir dimensionalidade

In [None]:
model = TSNE(n_components=2, learning_rate='auto', init='random', perplexity=15)

array_red = model.fit_transform(df_features) 

df_tsne = pd.DataFrame(array_red)

df_tsne['target'] = target

plt.rcParams['figure.figsize'] = [15, 10]

sns.scatterplot(data=df_tsne, x=df_tsne[0], y=df_tsne[1], hue=df_tsne['target'], palette="colorblind")

plt.show()

# 5) Treinamento de modelo/rede

In [None]:
def getModel():
    return RandomForestClassifier(n_jobs=20)
    # return LogisticRegression()#n_jobs=20, solver='sag', multi_class='ovr')
    # return KNeighborsClassifier(n_neighbors=3,n_jobs=20)
    # return XGBClassifier()
    # return SGDClassifier(n_jobs=20,early_stopping=True,validation_fraction=0.05)
		
		# TODO Rede aqui ao invés de modelo

## Verificação de score

In [None]:
scores = cross_val_score(getModel(), df_features.values, target, cv=10)

np.mean(scores)

## Matriz de confusão

In [None]:
predicoes = cross_val_predict(getModel(),df_features.values,target,cv=10)

cm = confusion_matrix(target, predicoes)

disp = ConfusionMatrixDisplay(confusion_matrix=cm,
                              display_labels=model.classes_)
disp.plot()

plt.show()