In [1]:
import time
import math
import json
import os
import sys
import matplotlib.pyplot as plt
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Input, Flatten, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import pandas as pd
import tensorflow as tf
import numpy as np

In [2]:
DATA_DIR = 'D:/Projects/ThyroidCancer/Data/slice_datav1'
TARGET_SIZE = (224, 224)
BATCH_SIZE = 12

In [3]:
import numpy as np
import tensorflow as tf

# Đếm số lượng mẫu trong mỗi nhãn
label_counts = [41, 216, 312]  # Số lượng mẫu của các nhãn tương ứng

# Tính toán trọng số cho mỗi nhãn
max_count = max(label_counts)
class_weights = [max_count / count for count in label_counts]

# Tạo mảng trọng số tương ứng với từng mẫu
sample_weights = np.array([class_weights[label] for label in [0, 1, 2]])

In [4]:
# Data generators
train_datagen = ImageDataGenerator(rescale=1/255.0)
valid_datagen = ImageDataGenerator(rescale=1/255.0)
test_datagen = ImageDataGenerator(rescale=1/255.0)

In [5]:
train_generator = train_datagen.flow_from_directory(
    os.path.join(DATA_DIR, 'train'),
    target_size=TARGET_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=True
)

valid_generator = valid_datagen.flow_from_directory(
    os.path.join(DATA_DIR, 'test'),
    target_size=TARGET_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=True
)

test_generator = test_datagen.flow_from_directory(
    os.path.join(DATA_DIR, 'valid'),
    target_size=TARGET_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False
)

# Display information
print("Class Indices: ", train_generator.class_indices)
print(f"Number of training samples: {train_generator.samples}")
print(f"Number of validation samples: {valid_generator.samples}")
print(f"Number of test samples: {test_generator.samples}")

print("Training set:")
for class_name, idx in train_generator.class_indices.items():
    num_files = len(os.listdir(os.path.join(DATA_DIR, 'train', class_name)))
    print(f"{class_name} ({idx}): {num_files} files")

print("Validation set:")
for class_name, idx in valid_generator.class_indices.items():
    num_files = len(os.listdir(os.path.join(DATA_DIR, 'valid', class_name)))
    print(f"{class_name} ({idx}): {num_files} files")

print("Test set:")
for class_name, idx in test_generator.class_indices.items():
    num_files = len(os.listdir(os.path.join(DATA_DIR, 'test', class_name)))
    print(f"{class_name} ({idx}): {num_files} files")

Found 569 images belonging to 3 classes.
Found 287 images belonging to 3 classes.
Found 569 images belonging to 3 classes.
Class Indices:  {'B2': 0, 'B5': 1, 'B6': 2}
Number of training samples: 569
Number of validation samples: 287
Number of test samples: 569
Training set:
B2 (0): 41 files
B5 (1): 216 files
B6 (2): 312 files
Validation set:
B2 (0): 41 files
B5 (1): 216 files
B6 (2): 312 files
Test set:
B2 (0): 21 files
B5 (1): 109 files
B6 (2): 157 files


In [6]:
# Function to count the number of samples in each class
def count_samples(generator, class_indices):
    sample_counts = {class_name: 0 for class_name in class_indices.keys()}
    
    for _ in range(len(generator)):
        _, labels = next(generator)
        for label in labels:
            class_name = list(class_indices.keys())[np.argmax(label)]
            sample_counts[class_name] += 1
    
    return sample_counts

# Count the number of samples in each class after augmentation
train_counts = count_samples(train_generator, train_generator.class_indices)
valid_counts = count_samples(valid_generator, valid_generator.class_indices)
test_counts = count_samples(test_generator, test_generator.class_indices)

# Print the sample counts
print("Sample counts after augmentation:")
print("Training set:")
for class_name, count in train_counts.items():
    print(f"{class_name}: {count} samples")

print("Validation set:")
for class_name, count in valid_counts.items():
    print(f"{class_name}: {count} samples")

print("Test set:")
for class_name, count in test_counts.items():
    print(f"{class_name}: {count} samples")

Sample counts after augmentation:
Training set:
B2: 41 samples
B5: 216 samples
B6: 312 samples
Validation set:
B2: 21 samples
B5: 109 samples
B6: 157 samples
Test set:
B2: 41 samples
B5: 216 samples
B6: 312 samples


In [7]:
def save_history(history):
    acc = pd.Series(history.history["accuracy"], name="accuracy")
    loss = pd.Series(history.history["loss"], name="loss")
    val_acc = pd.Series(history.history["val_accuracy"], name="val_accuracy")
    val_loss = pd.Series(history.history["val_loss"], name="val_loss")
    com = pd.concat([acc, loss, val_acc, val_loss], axis=1)
    com.to_csv("slice_datav1_vgg0_1_history.csv", index=False)

In [8]:
def plot_history(history):
    plt.plot(history.history["accuracy"])
    plt.plot(history.history["val_accuracy"])
    plt.plot(history.history["loss"])
    plt.plot(history.history["val_loss"])
    plt.title("Model Accuracy and Loss")
    plt.ylabel("Accuracy/Loss")
    plt.xlabel("Epoch")
    plt.legend(["accuracy", "val_accuracy", "loss", "val_loss"], loc="upper right")
    plt.savefig("slice_datav1_vgg0_1_model_accuracy_loss.png")
    # plt.show()

In [10]:
Inp = Input((224, 224, 3))
base_model = tf.keras.applications.vgg16.VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
x = base_model(Inp)
x = Flatten()(x)
# x = GlobalAveragePooling2D()(x) # số lượng đặc trưng được duỗi ra là 512
x = Dense(4096, activation='relu')(x)  
x = Dropout(0.5)(x)
x = Dense(4096, activation='relu')(x)   
x = Dropout(0.5)(x)
predictions = Dense(len(classes), activation="softmax")(x)
finetuned_model = Model(inputs=Inp, outputs=predictions)

for layer in base_model.layers:
    layer.trainable = False

In [11]:
print('Model architecture:')
finetuned_model.summary()

finetuned_model.compile(
    optimizer=Adam(learning_rate=0.0001), loss="categorical_crossentropy", metrics=["accuracy"]
)

for c in train_generator.class_indices:
    classes[train_generator.class_indices[c]] = c
finetuned_model.classes = classes
early_stopping = EarlyStopping(patience=5*2)
checkpointer = ModelCheckpoint(
    "vgg0.1_best_slice_datav1.keras",
    verbose=1,
    save_best_only=True,
)

Model architecture:


In [12]:
def combined_generator(train_generator, test_generator):
    while True:
        for data in train_generator:
            yield data
        for data in test_generator:
            yield data

combined_gen = combined_generator(train_generator, test_generator)

In [13]:
# Convert generators to tf.data.Dataset
def generator_to_tfdata(generator):
    dataset = tf.data.Dataset.from_generator(
        lambda: generator,
        output_signature=(
            tf.TensorSpec(shape=(None, TARGET_SIZE[0], TARGET_SIZE[1], 3), dtype=tf.float32),
            tf.TensorSpec(shape=(None, 3), dtype=tf.float32),
        )
    )
    return dataset

train_dataset = generator_to_tfdata(combined_gen).repeat()
valid_dataset = generator_to_tfdata(valid_generator).repeat()

In [14]:
sample_weights

array([7.6097561 , 1.44444444, 1.        ])

In [15]:
# Define your class weights
class_weights = {0: sample_weights[0], 1: sample_weights[1], 2: sample_weights[2]}  # replace with your actual weights

In [17]:
# Tính số bước cho mỗi epoch
num_train_steps = math.ceil(train_generator.samples * 2 / BATCH_SIZE)
num_valid_steps = math.ceil(valid_generator.samples / BATCH_SIZE)
classes = list(iter(train_generator.class_indices))
print(f'Num train step: {num_train_steps}\nNum valid step: {num_valid_steps}\nClasses: {classes}')

Num train step: 95
Num valid step: 24
Classes: ['B2', 'B5', 'B6']


In [18]:
# Update the fit() function to remove the sample_weights parameter
History = finetuned_model.fit(
    train_dataset,
    steps_per_epoch=num_train_steps,
    epochs=100,
    callbacks=[early_stopping, checkpointer],
    validation_data=valid_dataset,
    validation_steps=num_valid_steps,
    class_weight=class_weights
)


Epoch 1/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.4781 - loss: 2.6367
Epoch 1: val_loss improved from inf to 1.01919, saving model to vgg0.1_best_slice_datav1.keras
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m247s[0m 3s/step - accuracy: 0.4785 - loss: 2.6339 - val_accuracy: 0.5784 - val_loss: 1.0192
Epoch 2/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.5919 - loss: 1.6632
Epoch 2: val_loss improved from 1.01919 to 0.81966, saving model to vgg0.1_best_slice_datav1.keras
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m257s[0m 3s/step - accuracy: 0.5921 - loss: 1.6622 - val_accuracy: 0.6098 - val_loss: 0.8197
Epoch 3/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.6585 - loss: 1.2562
Epoch 3: val_loss did not improve from 0.81966
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m246s[0m 3s/step - accuracy: 0.6587 - loss: 1.25

In [19]:
accuracy = finetuned_model.evaluate(test_generator, return_dict=True)

  self._warn_if_super_not_called()


[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 1s/step - accuracy: 0.6355 - loss: 1.5590


In [20]:
print(accuracy)

{'accuracy': 0.6994727849960327, 'loss': 1.135553240776062}


In [24]:
accuracy = finetuned_model.evaluate(test_generator, steps=47, return_dict=True)

[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 2s/step - accuracy: 0.6341 - loss: 1.5681


In [25]:
accuracy

{'accuracy': 0.6968085169792175, 'loss': 1.1455230712890625}

In [26]:
accuracy = finetuned_model.evaluate(train_dataset, steps=num_train_steps, return_dict=True)

[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m144s[0m 2s/step - accuracy: 0.9942 - loss: 0.0184


In [27]:
accuracy

{'accuracy': 0.9928951859474182, 'loss': 0.019867219030857086}

---