# Import libraries

In [None]:
import numpy as np 
import os
import keras
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt
from keras.models import Sequential
from PIL import Image
from keras.layers import Conv2D,Flatten,Dense,Dropout,BatchNormalization,MaxPooling2D
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("ninadaithal/imagesoasis")
# ref_df = create_ref_df('/kaggle/input/imagesoasis/Data')
print("Path to dataset files:", path)
# print("df shape:", ref_df.head())

# Import Dataset

In [None]:
import os
import random

def collect_image_paths(directory):
    paths = []
    for dirname, _, filenames in os.walk(directory):
        for filename in filenames:
            paths.append(os.path.join(dirname, filename))
    return paths

path1 = collect_image_paths(os.path.join(path, 'Data/Non Demented'))
path2 = collect_image_paths(os.path.join(path, 'Data/Mild Dementia'))
path3 = collect_image_paths(os.path.join(path, 'Data/Moderate Dementia'))
path4 = collect_image_paths(os.path.join(path, 'Data/Very mild Dementia'))

# Set the size of the sample
size = 400  # You can change this value as needed

# Set seed for reproducibility
random.seed(42)

# Sample random paths
sample_path1 = random.sample(path1, min(size, len(path1)))
sample_path2 = random.sample(path2, min(size, len(path2)))
sample_path3 = random.sample(path3, min(size, len(path3)))
sample_path4 = random.sample(path4, min(size, len(path4)))

# Output the sample sizes
print(f'Sampled {len(sample_path1)} paths from Non Demented')
print(f'Sampled {len(sample_path2)} paths from Mild Dementia')
print(f'Sampled {len(sample_path3)} paths from Moderate Dementia')
print(f'Sampled {len(sample_path4)} paths from Very mild Dementia')


# ETL

In [None]:
import os

classes = ['Non Demented', 'Mild Dementia', 'Moderate Dementia', 'Very mild Dementia']
for class_name in classes:
    class_path = os.path.join(path, f"Data/{class_name}")
    num_images = len([f for f in os.listdir(class_path) if os.path.isfile(os.path.join(class_path, f))])
    print(f"Classe '{class_name}': {num_images} imagens")

In [None]:
import matplotlib.pyplot as plt
from PIL import Image

def show_images_from_class(class_name, num_images=5):
    class_path = os.path.join(path, f"Data/{class_name}")
    images = [os.path.join(class_path, f) for f in os.listdir(class_path) if os.path.isfile(os.path.join(class_path, f))]
    plt.figure(figsize=(15, 5))
    for i, img_path in enumerate(images[:num_images]):
        img = Image.open(img_path)
        plt.subplot(1, num_images, i + 1)
        plt.imshow(img)
        plt.title(class_name)
        plt.axis("off")
    plt.show()

for class_name in classes:
    show_images_from_class(class_name)

In [None]:
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt

data_distribution = []
for class_name in classes:
    class_path = os.path.join(path, f"Data/{class_name}")
    num_images = len([f for f in os.listdir(class_path) if os.path.isfile(os.path.join(class_path, f))])
    data_distribution.append({"Class": class_name, "Amount": num_images})

df_distribution = pd.DataFrame(data_distribution)

df_distribution['Class'] = ['non', 'mild', 'moderate', 'very']

plt.figure(figsize=(8, 6))
ax = sns.barplot(x="Class", y="Amount", data=df_distribution, palette="viridis")

for p in ax.patches:
    ax.annotate(f'{int(p.get_height())}', 
                (p.get_x() + p.get_width() / 2., p.get_height()), 
                ha='center', va='baseline', fontsize=10, color='black', xytext=(0, 5), 
                textcoords='offset points')

plt.title("Class Image Distribution", fontsize=14)
plt.xlabel("Tumor type", fontsize=12)
plt.ylabel("Count", fontsize=12)
plt.show()

In [None]:
dimensions = []
for class_name in classes:
    class_path = os.path.join(path, f"Data/{class_name}")
    images = [os.path.join(class_path, f) for f in os.listdir(class_path) if os.path.isfile(os.path.join(class_path, f))]
    for img_path in images:
        with Image.open(img_path) as img:
            dimensions.append(img.size)

df_dimensions = pd.DataFrame(dimensions, columns=["Largura", "Altura"])

print("Image Sizes:")
print(df_dimensions.describe())

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image

# Configurar o gerador de data augmentation
datagen = ImageDataGenerator(
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Selecionar uma imagem da classe 'Moderate Dementia'
img_path = sample_path3[0]  # Escolha uma imagem da classe
img = Image.open(img_path).resize((128, 128))
img_array = np.array(img).reshape((1,) + img.size + (3,))  # Redimensionar para o formato esperado pelo datagen

# Gerar imagens augmentadas
plt.figure(figsize=(15, 5))

# Mostrar a imagem original
plt.subplot(1, 6, 1)
plt.imshow(img)
plt.title("Original")
plt.axis('off')

# Mostrar imagens augmentadas
for i, batch in enumerate(datagen.flow(img_array, batch_size=1)):
    augmented_img = batch[0].astype(np.uint8)  # Converter para formato visualizável
    plt.subplot(1, 6, i + 2)
    plt.imshow(augmented_img)
    plt.title(f"Augmented {i+1}")
    plt.axis('off')
    if i == 4:  # Mostrar 5 imagens augmentadas
        break

plt.suptitle("Comparação: Original e Imagens Augmentadas - Moderate Dementia", fontsize=16)
plt.tight_layout()
plt.show()

# One Hot Encoding

In [None]:
# Initialize the encoder
encoder = OneHotEncoder()

# Fit the encoder on your categorical data
encoder.fit([[0], [1], [2], [3]])

# 0 --> Non Demented
# 1 --> Mild Dementia
# 2 --> Moderate Dementia
# 3 --> Very Mild Dementia

In [None]:
data = []
result = []
for path in sample_path1:
    img = Image.open(path)
    img = img.resize((128,128))
    img = np.array(img)
    if(img.shape == (128,128,3)):
        data.append(np.array(img))
        result.append(encoder.transform([[0]]).toarray())
        
for path in sample_path2:
    img = Image.open(path)
    img = img.resize((128,128))
    img = np.array(img)
    if(img.shape == (128,128,3)):
        data.append(np.array(img))
        result.append(encoder.transform([[1]]).toarray()) 
        
for path in sample_path3:
    img = Image.open(path)
    img = img.resize((128,128))
    img = np.array(img)
    if(img.shape == (128,128,3)):
        data.append(np.array(img))
        result.append(encoder.transform([[2]]).toarray())
        
for path in sample_path4:
    img = Image.open(path)
    img = img.resize((128,128))
    img = np.array(img)
    if(img.shape == (128,128,3)):
        data.append(np.array(img))
        result.append(encoder.transform([[3]]).toarray())

In [None]:
data = np.array(data)
data.shape

In [None]:
result = np.array(result)
result = result.reshape((1600,4))
result.shape

# Splitting The Data

In [None]:
x_train,x_test,y_train,y_test = train_test_split(data,result,test_size=0.15,shuffle=True,random_state=42)

# Creating Model: CNN

In [None]:
import os
from keras.models import load_model
import json

model_path = "./alzheimer_model.keras"
history_path = "./training_history.json"
enable_load_model = False # Trocar pra True depois que finalizar os testes

if os.path.exists(model_path) and enable_load_model:
    print("Carregando modelo salvo...")
    model = load_model(model_path)

    if os.path.exists(history_path):
      with open(history_path, "r") as f:
        history = json.load(f)
      print("Histórico de treinamento carregado com sucesso!")
    else:
      history = None
      print("Histórico de treinamento não encontrado.")
else:
  print("Nenhum modelo salvo encontrado. Criando um novo modelo...")
  model = Sequential()

  model.add(Conv2D(32,kernel_size =(2,2),input_shape = (128,128,3),padding = 'Same'))
  model.add(Conv2D(32,kernel_size =(2,2),activation='relu',padding = 'Same'))

  model.add(BatchNormalization())
  model.add(MaxPooling2D(pool_size=(2,2)))
  model.add(Dropout(0.25))

  model.add(Conv2D(64,kernel_size =(2,2),activation='relu',padding = 'Same'))
  model.add(Conv2D(64,kernel_size =(2,2),activation='relu',padding = 'Same'))

  model.add(BatchNormalization())
  model.add(MaxPooling2D(pool_size=(2,2),strides = (2,2)))
  model.add(Dropout(0.25))

  model.add(Flatten())
              
  model.add(Dense(512,activation = 'relu'))
  model.add(Dropout(0.5))
  model.add(Dense(4,activation='softmax'))
              
  model.compile(loss = 'categorical_crossentropy',optimizer = 'Adamax',metrics=['accuracy'])

  # train the model
  history = model.fit(x_train,y_train,epochs=10,batch_size=10,verbose=1,validation_data=(x_test,y_test))
  model.save(model_path)
  # Salvar o histórico de treinamento
  with open(history_path, "w") as f:
    json.dump(history.history, f)


print(model.summary())

In [None]:
y_train.shape  

In [None]:
x_train.shape

In [None]:
plt.plot(history['loss'])
plt.plot(history['val_loss'])
plt.title('Model Loss')
plt.xlabel('Loss')
plt.ylabel('Epoch')
plt.legend(['Test','Validation'],loc='upper right')
plt.show()

# Accuracy and Prediction

In [None]:
from matplotlib.pyplot import imshow

def names(number):
    if number == 0:
        return 'Sem demência'
    elif number == 1:
        return 'Demência Leve'
    elif number == 2:
        return 'Demência Moderada'
    elif number == 3:
        return 'Demência Muito Leve'
    else:
        return 'Erro na Predição'

def print_prediction(img_url): 
    img = Image.open(img_url).convert("RGB")
    x = np.array(img.resize((128,128)))
    x = x.reshape(1,128,128,3)
    res=model.predict_on_batch(x)
    classification = np.where(res == np.amax(res))[1][0]
    imshow(img)
    print(str(res[0][classification]*100)+ '% de confiança de ser '+ names(classification))
        
# 0 --> Non Demented
# 1 --> Mild Dementia
# 2 --> Moderate Dementia
# 3 --> Very Mild Dementia    

In [None]:
path_test = os.path.join(path, r'Data/Moderate Dementia/OAS1_0308_MR1_mpr-1_101.jpg')
print_prediction(path_test)

In [None]:
print_prediction(r'/kaggle/input/test-images/test.jpg')

In [None]:
print_prediction(r'/kaggle/input/test-images/demencia moderada.jpg')

In [None]:

path_test = os.path.join(path, r'Data/Very mild Dementia/OAS1_0003_MR1_mpr-1_117.jpg')
print_prediction(path_test)

In [None]:
print_prediction(os.path.join(path, 'Data/Mild Dementia/OAS1_0028_MR1_mpr-1_145.jpg'))

In [None]:
# Evaluate on test set
test_loss, test_accuracy = model.evaluate(x_test, y_test, verbose=1)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")

In [None]:
# Plot training & validation accuracy and loss
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 5))

# Accuracy
plt.subplot(1, 2, 1)
plt.plot(history['accuracy'], label='Train Acc')
plt.plot(history['val_accuracy'], label='Val Acc')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

# Loss
plt.subplot(1, 2, 2)
plt.plot(history['loss'], label='Train Loss')
plt.plot(history['val_loss'], label='Val Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

y_pred_probs = model.predict(x_test)
y_pred = np.argmax(y_pred_probs, axis=1)
y_true = np.argmax(y_test, axis=1)

print("Classification Report:\n", classification_report(y_true, y_pred))

# Confusion Matrix
import seaborn as sns
cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(6, 5))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=["Non", "Mild", "Mod", "Very Mild"], yticklabels=["Non", "Mild", "Mod", "Very Mild"])
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix")
plt.show()