In [5]:
!brew install numpy

[34m==>[0m [1mAuto-updating Homebrew...[0m
Adjust how often this is run with HOMEBREW_AUTO_UPDATE_SECS or disable with
HOMEBREW_NO_AUTO_UPDATE. Hide these hints with HOMEBREW_NO_ENV_HINTS (see `man brew`).
[34m==>[0m [1mAuto-updated Homebrew![0m
Updated 2 taps (homebrew/core and homebrew/cask).
[34m==>[0m [1mNew Formulae[0m
bkmr                newsraft            tdom                undercutf1
infat               policy-engine       tmex                unoserver
[34m==>[0m [1mNew Casks[0m
elemental                                font-noto-serif-dives-akuru
font-bizter                              font-wdxl-lubrifont-tc

You have [1m10[0m outdated formulae and [1m1[0m outdated cask installed.

[34m==>[0m [1mDownloading https://ghcr.io/v2/homebrew/core/numpy/manifests/2.2.5[0m
######################################################################### 100.0%
[32m==>[0m [1mFetching dependencies for numpy: [32mgmp[39m, [32misl[39m, [32mmpfr[39m, [32mlibmpc[

In [None]:
!sudo apt-get install python3-matplotlib

Password:

In [22]:
!brew install python@3.13

[34m==>[0m [1mDownloading https://formulae.brew.sh/api/formula.jws.json[0m
[34m==>[0m [1mDownloading https://formulae.brew.sh/api/cask.jws.json[0m
To reinstall 3.13.3, run:
  brew reinstall python@3.13


In [13]:
import os
import random
import shutil
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt

from sklearn.datasets import make_circles, make_classification, make_moons
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.inspection import DecisionBoundaryDisplay
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint




ModuleNotFoundError: No module named 'sklearn'

In [8]:
!brew install scikit-image

[34m==>[0m [1mDownloading https://ghcr.io/v2/homebrew/core/scikit-image/manifests/0.25.2[0m
######################################################################### 100.0%
[32m==>[0m [1mFetching dependencies for scikit-image: [32mxsimd[39m and [32mscipy[39m[0m
[34m==>[0m [1mDownloading https://ghcr.io/v2/homebrew/core/xsimd/manifests/13.2.0[0m
######################################################################### 100.0%
[32m==>[0m [1mFetching [32mxsimd[39m[0m
[34m==>[0m [1mDownloading https://ghcr.io/v2/homebrew/core/xsimd/blobs/sha256:b2e11ecb6ae8[0m
######################################################################### 100.0%
[34m==>[0m [1mDownloading https://ghcr.io/v2/homebrew/core/scipy/manifests/1.15.2[0m
######################################################################### 100.0%
[32m==>[0m [1mFetching [32mscipy[39m[0m
[34m==>[0m [1mDownloading https://ghcr.io/v2/homebrew/core/scipy/blobs/sha256:6e721be4b8e3[0m
#################

In [9]:
!brew cleanup scikit-image

In [None]:
from google.colab import drive
drive.mount('/content/drive')


In [None]:
import os
import shutil
import random
from pathlib import Path

# =========================
# 1. DIVISÃO DOS DADOS
# =========================
original_data_dir = '/content/drive/MyDrive/archive/BreaKHis_v1/BreaKHis_v1/histology_slides/breast'
output_base = '/content/drive/MyDrive/archive/BreaKHis_split'


In [None]:

# Proporções de divisão
train_split = 0.7
val_split = 0.15
test_split = 0.15

# Garantir que as proporções somam 1.0
assert abs(train_split + val_split + test_split - 1.0) < 1e-6, "As proporções devem somar 1.0"

# Semente para reprodutibilidade
random.seed(42)

# Caminhar por todas as subpastas
for root, dirs, files in os.walk(original_data_dir):
    if files:
        # Identificar o caminho da classe relativa à pasta original
        class_path = Path(root).relative_to(original_data_dir)

        # Filtrar arquivos de imagem
        image_files = [f for f in files if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
        random.shuffle(image_files)

        # Calcular os limites de corte
        total = len(image_files)
        train_end = int(train_split * total)
        val_end = train_end + int(val_split * total)

        # Dividir os arquivos
        splits = {
            'train': image_files[:train_end],
            'val': image_files[train_end:val_end],
            'test': image_files[val_end:]
        }

        # Copiar os arquivos para as pastas correspondentes
        for split, split_files in splits.items():
            split_dir = os.path.join(output_base, split, str(class_path))
            os.makedirs(split_dir, exist_ok=True)
            for fname in split_files:
                src = os.path.join(root, fname)
                dst = os.path.join(split_dir, fname)
                shutil.copy2(src, dst)

print("✅ Divisão dos dados concluída.")


In [None]:
# =========================
# 2. PREPARAÇÃO DOS DADOS
# =========================
img_width, img_height = 150, 150
batch_size = 32

train_data_dir = os.path.join(output_base, 'train')
val_data_dir = os.path.join(output_base, 'val')
test_data_dir = os.path.join(output_base, 'test')

print(train_data_dir)
print(val_data_dir)
print(test_data_dir)

train_datagen = ImageDataGenerator(rescale=1. / 255,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   horizontal_flip=True)

val_test_datagen = ImageDataGenerator(rescale=1. / 255)

train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='categorical'
)

val_generator = val_test_datagen.flow_from_directory(
    val_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='categorical'
)

test_generator = val_test_datagen.flow_from_directory(
    test_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False
)

/content/drive/MyDrive/archive/BreaKHis_split/train
/content/drive/MyDrive/archive/BreaKHis_split/val
/content/drive/MyDrive/archive/BreaKHis_split/test
Found 5436 images belonging to 2 classes.
Found 1042 images belonging to 2 classes.
Found 1502 images belonging to 2 classes.


In [None]:
# =========================
# 3. MODELO
# =========================
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(img_width, img_height, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(train_generator.num_classes, activation='softmax')
])

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])


In [None]:
# =========================
# 4. CALLBACKS
# =========================
checkpoint_path = '/content/drive/MyDrive/breast_cancer_best_model.h5'

early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=3,
    restore_best_weights=True
)

model_checkpoint = ModelCheckpoint(
    filepath=checkpoint_path,
    monitor='val_loss',
    save_best_only=True
)

In [None]:
# =========================
# 5. TREINAMENTO
# =========================
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=10,
    callbacks=[early_stopping, model_checkpoint]
)

Epoch 1/10
[1m170/170[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13s/step - accuracy: 0.6861 - loss: 0.6636 



[1m170/170[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2582s[0m 15s/step - accuracy: 0.6865 - loss: 0.6629 - val_accuracy: 0.7994 - val_loss: 0.4753
Epoch 2/10
[1m170/170[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.7996 - loss: 0.4712



[1m170/170[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m222s[0m 1s/step - accuracy: 0.7996 - loss: 0.4712 - val_accuracy: 0.8445 - val_loss: 0.4683
Epoch 3/10
[1m170/170[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.8187 - loss: 0.4564



[1m170/170[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m248s[0m 1s/step - accuracy: 0.8187 - loss: 0.4563 - val_accuracy: 0.8417 - val_loss: 0.4132
Epoch 4/10
[1m170/170[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m267s[0m 1s/step - accuracy: 0.8023 - loss: 0.4672 - val_accuracy: 0.8282 - val_loss: 0.4323
Epoch 5/10
[1m170/170[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.8381 - loss: 0.4116



[1m170/170[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m208s[0m 1s/step - accuracy: 0.8380 - loss: 0.4117 - val_accuracy: 0.8426 - val_loss: 0.3911
Epoch 6/10
[1m170/170[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m266s[0m 1s/step - accuracy: 0.8417 - loss: 0.4069 - val_accuracy: 0.8061 - val_loss: 0.4691
Epoch 7/10
[1m170/170[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.8434 - loss: 0.4026



[1m170/170[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m208s[0m 1s/step - accuracy: 0.8434 - loss: 0.4026 - val_accuracy: 0.8580 - val_loss: 0.3766
Epoch 8/10
[1m170/170[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m212s[0m 1s/step - accuracy: 0.8473 - loss: 0.3979 - val_accuracy: 0.8541 - val_loss: 0.4044
Epoch 9/10
[1m170/170[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m213s[0m 1s/step - accuracy: 0.8475 - loss: 0.3970 - val_accuracy: 0.8551 - val_loss: 0.3865
Epoch 10/10
[1m170/170[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m257s[0m 1s/step - accuracy: 0.8560 - loss: 0.3860 - val_accuracy: 0.8311 - val_loss: 0.3980


In [None]:


# =========================
# 6. AVALIAÇÃO E MÉTRICAS
# =========================
predictions = model.predict(test_generator)
y_pred = np.argmax(predictions, axis=1)
y_true = test_generator.classes
class_labels = list(test_generator.class_indices.keys())

# Matriz de confusão
cm = confusion_matrix(y_true, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_labels)
disp.plot(cmap=plt.cm.Blues, xticks_rotation=45)
plt.title("Matriz de Confusão - Conjunto de Teste")
plt.tight_layout()

conf_matrix_path = '/content/drive/MyDrive/matriz_confusao.png'
plt.savefig(conf_matrix_path)
print(f"✅ Matriz de confusão salva em: {conf_matrix_path}")

# Relatório de classificação
report = classification_report(y_true, y_pred, target_names=class_labels)
report_path = '/content/drive/MyDrive/relatorio_classificacao.txt'
with open(report_path, 'w') as f:
    f.write("Relatório de Classificação - Conjunto de Teste\n\n")
    f.write(report)
print(f"✅ Relatório salvo em: {report_path}")

# =========================
# 7. CURVAS DE TREINAMENTO
# =========================
plt.figure(figsize=(12, 5))

# Acurácia
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Treinamento')
plt.plot(history.history['val_accuracy'], label='Validação')
plt.title('Acurácia por Época')
plt.xlabel('Época')
plt.ylabel('Acurácia')
plt.legend()

# Perda
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Treinamento')
plt.plot(history.history['val_loss'], label='Validação')
plt.title('Perda por Época')
plt.xlabel('Época')
plt.ylabel('Loss')
plt.legend()

# Salvar gráfico
plot_path = '/content/drive/MyDrive/curvas_treinamento.png'
plt.tight_layout()
plt.savefig(plot_path)
print(f"✅ Curvas de treinamento salvas em: {plot_path}")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  self._warn_if_super_not_called()


Epoch 1/10
[1m 10/170[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m38:30[0m 14s/step - accuracy: 0.5031 - loss: 1.0083

KeyboardInterrupt: 