In [1]:
!pip install opendatasets

Collecting opendatasets
  Downloading opendatasets-0.1.22-py3-none-any.whl.metadata (9.2 kB)
Downloading opendatasets-0.1.22-py3-none-any.whl (15 kB)
Installing collected packages: opendatasets
Successfully installed opendatasets-0.1.22


In [2]:
import opendatasets as od

In [3]:
od.download(
    "https://www.kaggle.com/datasets/kasikrit/idc-dataset")

Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds
Your Kaggle username: realag253
Your Kaggle Key: ··········
Dataset URL: https://www.kaggle.com/datasets/kasikrit/idc-dataset
Downloading idc-dataset.zip to ./idc-dataset


100%|██████████| 1.55G/1.55G [00:11<00:00, 142MB/s]





In [4]:
# Knižnice
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.applications.inception_v3 import preprocess_input
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping


In [5]:
#cesta k datasetu
#/content/idc-dataset/IDC


# Načítanie datasetu
DATASET = "/content/idc-dataset/IDC"

TRAINING_DIR = os.path.join(DATASET, "training")
VALIDATION_DIR = os.path.join(DATASET, "validation")
TESTING_DIR = os.path.join(DATASET, "testing")


# Nastavenie veľkosti obrázkov a počet kanálov
IMG_SIZE = 75
COLOR_CHANNELS = 3
BATCH_SIZE = 128


In [6]:
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    horizontal_flip=True,
    zoom_range=0.2
)

datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255
)

train_gen = train_datagen.flow_from_directory(
    TRAINING_DIR,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='sparse'
)

val_gen = datagen.flow_from_directory(
    VALIDATION_DIR,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='sparse'
)

test_gen = datagen.flow_from_directory(
    TESTING_DIR,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='sparse',
    shuffle=False
)

NUM_CLASSES = train_gen.num_classes


Found 199818 images belonging to 2 classes.
Found 22201 images belonging to 2 classes.
Found 55505 images belonging to 2 classes.


In [7]:
base_model = InceptionV3(
    weights="imagenet",
    include_top=False,
    input_shape=(IMG_SIZE, IMG_SIZE, 3)
)

# Freeze base model
for layer in base_model.layers:
    layer.trainable = False

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(512, activation="relu")(x)
x = Dropout(0.5)(x)
outputs = Dense(2, activation="softmax")(x)

model = Model(inputs=base_model.input, outputs=outputs)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m87910968/87910968[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
model.compile(
    optimizer=Adam(learning_rate=1e-4),
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

model.summary()

In [8]:
early_stopping = EarlyStopping(
    monitor="val_loss",      # best choice for imbalance
    patience=3,              # stop after 3 epochs with no improvement
    restore_best_weights=True,
    verbose=1
)


In [9]:
from sklearn.metrics import (
    precision_score,
    recall_score,
    f1_score,
    balanced_accuracy_score,
    classification_report,
    confusion_matrix
)



In [10]:
# Unfreeze top layers of InceptionV3
for layer in base_model.layers[-50:]:
    layer.trainable = True

model.compile(
    optimizer=Adam(learning_rate=0.00045),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

history_finetune = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=15,
    callbacks=[early_stopping]
)


# Evaluate
scores = model.evaluate(test_gen, verbose=0)
print(f"Evaluating Model — Loss: {scores[0]:.4f}, Accuracy: {scores[1]:.4f}")

# Evaluate on TEST SET
y_true = []
y_pred = []

test_gen.reset()  # VERY IMPORTANT

# Predict probabilities
y_prob = model.predict(test_gen, verbose=0)

# Convert softmax → class index
y_pred = np.argmax(y_prob, axis=1)

# True labels
y_true = test_gen.classes

"""
for i in range(len(test_gen)):
    images, labels = test_gen[i]
    preds = model.predict(images, verbose=0)

    preds = (preds > 0.5).astype(int).reshape(-1)

    y_pred.extend(preds)
    y_true.extend(labels)

precision = precision_score(y_true, y_pred)
recall    = recall_score(y_true, y_pred)
f1        = f1_score(y_true, y_pred)
bal_acc   = balanced_accuracy_score(y_true, y_pred)

print("\n=== Test Metrics (Imbalanced Dataset) ===")
print(f"Precision:          {precision:.4f}")
print(f"Recall:             {recall:.4f}")
print(f"F1-score:           {f1:.4f}")
print(f"Balanced Accuracy:  {bal_acc:.4f}")
"""

class_names = ["0", "1"]

print("\nClassification Report:")
print(classification_report(
    y_true,
    y_pred,
    target_names=class_names,
    digits=4
))


cm = confusion_matrix(y_true, y_pred)
print("Confusion matrix:\n", cm)


bal_acc = balanced_accuracy_score(y_true, y_pred)
print(f"\nBalanced Accuracy: {bal_acc:.4f}")

  self._warn_if_super_not_called()


Epoch 1/15
[1m1562/1562[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m451s[0m 272ms/step - accuracy: 0.8337 - loss: 0.3969 - val_accuracy: 0.8543 - val_loss: 0.3435
Epoch 2/15
[1m1562/1562[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m395s[0m 253ms/step - accuracy: 0.8552 - loss: 0.3438 - val_accuracy: 0.8587 - val_loss: 0.3338
Epoch 3/15
[1m1562/1562[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m391s[0m 250ms/step - accuracy: 0.8590 - loss: 0.3331 - val_accuracy: 0.8642 - val_loss: 0.3329
Epoch 4/15
[1m1562/1562[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m393s[0m 251ms/step - accuracy: 0.8613 - loss: 0.3273 - val_accuracy: 0.8616 - val_loss: 0.3303
Epoch 5/15
[1m1562/1562[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m404s[0m 258ms/step - accuracy: 0.8642 - loss: 0.3222 - val_accuracy: 0.8646 - val_loss: 0.3250
Epoch 6/15
[1m1562/1562[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m390s[0m 250ms/step - accuracy: 0.8623 - loss: 0.3242 - val_accuracy: 0.8627 - val_loss: