In [27]:
import pandas as pd
import numpy as np
import glob
import cv2
import matplotlib.pyplot as plt
from tqdm import tqdm
import imageio

from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import roc_auc_score

from sentence_transformers import SentenceTransformer, util
from PIL import Image

In [8]:
image_feature_extractor = SentenceTransformer('clip-ViT-B-32')

---
### Tratando Imagens

In [18]:
# Train images

train_filenames_normal = glob.glob('chest_xray/train/NORMAL/*')
train_filenames_pneumonia = glob.glob('chest_xray/train/PNEUMONIA/*')

X_train = []

for filename in tqdm(train_filenames_normal):
    img = Image.open(filename)
    # img = cv2.imread(filename, 0)
    # img = cv2.resize(img, dsize=(160, 120), interpolation=cv2.INTER_CUBIC)
    # imageio.imwrite(filename, img)
    # X_train.append(img.flatten())
    X_train.append(img)

for filename in tqdm(train_filenames_pneumonia):
    img = Image.open(filename)
    # img = cv2.imread(filename, 0)
    # img = cv2.resize(img, dsize=(160, 120), interpolation=cv2.INTER_CUBIC)
    # imageio.imwrite(filename, img)
    # X_train.append(img.flatten())
    X_train.append(img)


# imgs = []
# for imagem in L:
#   imgs.append(Image.open(imagem))

embeddings = image_feature_extractor.encode(X_train)

# embeddings.shape

X_train = pd.DataFrame()
# X_train['imgs'] = L
X_train['embeddings'] = list(embeddings)
X_train


y_zeros = np.zeros(len(train_filenames_normal))
y_ones = np.ones(len(train_filenames_pneumonia))
y_train = np.concatenate([y_zeros,y_ones])

100%|██████████| 1341/1341 [00:07<00:00, 190.61it/s]
100%|██████████| 3875/3875 [00:18<00:00, 206.20it/s]


In [21]:
# Test images
test_filenames_normal = glob.glob('chest_xray/test/NORMAL/*')
test_filenames_pneumonia = glob.glob('chest_xray/test/PNEUMONIA/*')

X_test = []

for filename in tqdm(test_filenames_normal):
    img = Image.open(filename)
    # img = cv2.resize(img, dsize=(160, 120), interpolation=cv2.INTER_CUBIC)
    # imageio.imwrite(filename, img)
    X_test.append(img)


for filename in tqdm(test_filenames_pneumonia):
    img = Image.open(filename)
    # img = cv2.resize(img, dsize=(160, 120), interpolation=cv2.INTER_CUBIC)
    # imageio.imwrite(filename, img)
    X_test.append(img)


embeddings = image_feature_extractor.encode(X_test)

# embeddings.shape

X_test = pd.DataFrame()
# X_test['imgs'] = L
X_test['embeddings'] = list(embeddings)
X_test

y_zeros = np.zeros(len(test_filenames_normal))
y_ones = np.ones(len(test_filenames_pneumonia))
y_test = np.concatenate([y_zeros,y_ones])

100%|██████████| 234/234 [00:00<00:00, 5274.86it/s]
100%|██████████| 390/390 [00:00<00:00, 6004.09it/s]


In [22]:
# Validation images

val_filenames_normal = glob.glob('chest_xray/val/NORMAL/*')
val_filenames_pneumonia = glob.glob('chest_xray/val/PNEUMONIA/*')
val_imgs = []

for filename in tqdm(val_filenames_normal):
    img = Image.open(filename)
    # img = cv2.imread(filename, 0)
    # img = cv2.resize(img, dsize=(160, 120), interpolation=cv2.INTER_CUBIC)
    # imageio.imwrite(filename, img)
    val_imgs.append(img)


for filename in tqdm(val_filenames_pneumonia):
    img = Image.open(filename)
    # img = cv2.imread(filename, 0)
    # img = cv2.resize(img, dsize=(160, 120), interpolation=cv2.INTER_CUBIC)
    # imageio.imwrite(filename, img)
    val_imgs.append(img)
    
    
embeddings = image_feature_extractor.encode(val_imgs)

# embeddings.shape

X_val = pd.DataFrame()
# X_val['imgs'] = L
X_val['embeddings'] = list(embeddings)

y_zeros = np.zeros(len(val_filenames_normal))
y_ones = np.ones(len(val_filenames_pneumonia))
y_val = np.concatenate([y_zeros,y_ones])

100%|██████████| 8/8 [00:00<00:00, 2269.80it/s]
100%|██████████| 8/8 [00:00<00:00, 2664.53it/s]


---
### Extraindo embeddings

In [None]:
# Instanciando extrator de embeddings
image_feature_extractor = SentenceTransformer('clip-ViT-B-32')

# lendo arquivos das imagens
import glob
normal_train = glob.glob("archive/chest_xray/train/NORMAL/*.jpeg")
pneumonia_train = glob.glob("archive/chest_xray/train/PNEUMONIA/*.jpeg")
normal_test = glob.glob("archive/chest_xray/test/NORMAL/*.jpeg")
pneumonia_test = glob.glob("archive/chest_xray/test/PNEUMONIA/*.jpeg")

# lendo imagens
imgs = []
for imagem in normal_train:
  imgs.append(Image.open(imagem))
normal_train = imgs

imgs = []
for imagem in pneumonia_train:
  imgs.append(Image.open(imagem))
pneumonia_train = imgs

imgs = []
for imagem in normal_test:
  imgs.append(Image.open(imagem))
normal_test = imgs

imgs = []
for imagem in pneumonia_test:
  imgs.append(Image.open(imagem))
pneumonia_test = imgs

# Gerando embeddings
normal_train = image_feature_extractor.encode(normal_train)
pneumonia_train = image_feature_extractor.encode(pneumonia_train)
normal_test = image_feature_extractor.encode(normal_test)
pneumonia_test = image_feature_extractor.encode(pneumonia_test)

---
### Criando dataframes

In [11]:
# Dataframes
normal_train = pd.DataFrame(normal_train)
normal_train['Class'] = 0
pneumonia_train = pd.DataFrame(pneumonia_train)
pneumonia_train['Class'] = 1
normal_test = pd.DataFrame(normal_test)
normal_test['Class'] = 0
pneumonia_test = pd.DataFrame(pneumonia_test)
pneumonia_test['Class'] = 1

In [21]:
train = pd.concat([normal_train,pneumonia_train],ignore_index=True)
test = pd.concat([normal_test,pneumonia_test],ignore_index=True)
X_train = train.drop('Class',axis=1)
y_train = train['Class']
X_test = test.drop('Class',axis=1)
y_test = test['Class']

---
### Testando KNN --Baseline--

In [24]:
# Aplicar KNN --Baseline--
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train,y_train)

In [29]:
print(knn.score(X_test,y_test))
print(roc_auc_score(y_test,knn.predict_proba(X_test)[:,1]))

0.7884615384615384
0.858212798597414


---
### Testando rede neural