In [1]:
import os
import shutil
import random
from tqdm import tqdm

# Configuration
BASE_DIR = '/content/drive/MyDrive'
DATASET_DIR = os.path.join(BASE_DIR, 'Liver_Dataset')
SPLIT_DIR = os.path.join(BASE_DIR, 'Liver_Dataset_Split')
SEED = 42

# Count images in each class before any processing
original_counts = {}
for cls in os.listdir(DATASET_DIR):
    class_path = os.path.join(DATASET_DIR, cls)
    if os.path.isdir(class_path):
        original_counts[cls] = len([f for f in os.listdir(class_path) if f.endswith(('.jpg', '.jpeg', '.png'))])
print(f"Original image counts: {original_counts}")

# Split dataset into train, validation, and test sets
def split_dataset(source_dir, output_dir, split_ratios=(0.7, 0.15, 0.15)):
    random.seed(SEED)
    classes = os.listdir(source_dir)
    for split in ['train', 'val', 'test']:
        for cls in classes:
            os.makedirs(os.path.join(output_dir, split, cls), exist_ok=True)
    for cls in tqdm(classes, desc="Splitting data"):
        class_path = os.path.join(source_dir, cls)
        images = [f for f in os.listdir(class_path) if f.endswith(('.jpg', '.jpeg', '.png'))]
        random.shuffle(images)
        total = len(images)
        train_end = int(split_ratios[0] * total)
        val_end = train_end + int(split_ratios[1] * total)
        splits = {'train': images[:train_end], 'val': images[train_end:val_end], 'test': images[val_end:]}
        for split, img_list in splits.items():
            for img in img_list:
                src = os.path.join(class_path, img)
                dst = os.path.join(output_dir, split, cls, img)
                shutil.copy2(src, dst)
print("Dataset split completed.")

Original image counts: {'Normal': 150, 'CC': 160, 'HCC': 150}
Dataset split completed.


In [2]:
import os
from PIL import Image
from tqdm import tqdm

# Configuration
PREPROCESSED_DIR = '/content/drive/MyDrive/Liver_Dataset_Preprocessed'
SPLIT_DIR = '/content/drive/MyDrive/Liver_Dataset_Split'
IMG_SIZE = (224, 224)

# Resize images and count pre-augmented images
def resize_images(source_dir, target_dir, size=IMG_SIZE):
    pre_aug_counts = {}
    for split in ['train', 'val', 'test']:
        for label in os.listdir(os.path.join(source_dir, split)):
            src_path = os.path.join(source_dir, split, label)
            tgt_path = os.path.join(target_dir, split, label)
            os.makedirs(tgt_path, exist_ok=True)
            pre_aug_counts[f"{split}/{label}"] = len([f for f in os.listdir(src_path) if f.endswith(('.jpg', '.jpeg', '.png'))])
            for img_file in tqdm(os.listdir(src_path), desc=f"Resizing {split}/{label}"):
                if not img_file.endswith(('.jpg', '.jpeg', '.png')):
                    continue
                try:
                    img = Image.open(os.path.join(src_path, img_file)).convert('RGB')
                    img = img.resize(size)
                    img.save(os.path.join(tgt_path, img_file))
                except Exception as e:
                    print(f"Error processing {img_file}: {e}")
    print(f"Pre-augmentation image counts: {pre_aug_counts}")

resize_images(SPLIT_DIR, PREPROCESSED_DIR)

Resizing train/Normal: 100%|██████████| 140/140 [01:50<00:00,  1.26it/s]
Resizing train/CC: 100%|██████████| 146/146 [01:58<00:00,  1.23it/s]
Resizing train/HCC: 100%|██████████| 139/139 [01:51<00:00,  1.24it/s]
Resizing val/Normal: 100%|██████████| 43/43 [00:53<00:00,  1.24s/it]
Resizing val/CC: 100%|██████████| 42/42 [00:54<00:00,  1.30s/it]
Resizing val/HCC: 100%|██████████| 43/43 [00:55<00:00,  1.30s/it]
Resizing test/Normal: 100%|██████████| 41/41 [00:51<00:00,  1.26s/it]
Resizing test/CC: 100%|██████████| 44/44 [00:51<00:00,  1.18s/it]
Resizing test/HCC: 100%|██████████| 44/44 [00:53<00:00,  1.22s/it]

Pre-augmentation image counts: {'train/Normal': 140, 'train/CC': 145, 'train/HCC': 139, 'val/Normal': 43, 'val/CC': 41, 'val/HCC': 43, 'test/Normal': 41, 'test/CC': 43, 'test/HCC': 44}





In [3]:
import os
import numpy as np
from PIL import Image
from tqdm import tqdm
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Configuration
PREPROCESSED_DIR = '/content/drive/MyDrive/Liver_Dataset_Preprocessed'

# Augment data and count post-augmented images
def augment_and_save(source, target, augment_count=5):  # Increased to 5 for more data
    datagen = ImageDataGenerator(
        rotation_range=40, width_shift_range=0.3, height_shift_range=0.3,
        zoom_range=0.4, horizontal_flip=True, vertical_flip=True, brightness_range=[0.8, 1.2],
        fill_mode='nearest'
    )
    post_aug_counts = {}
    for label in os.listdir(source):
        class_src = os.path.join(source, label)
        class_tgt = os.path.join(target, label)
        os.makedirs(class_tgt, exist_ok=True)
        # Copy original images
        for img_name in os.listdir(class_src):
            if img_name.endswith(('.jpg', '.jpeg', '.png')):
                shutil.copy2(os.path.join(class_src, img_name), os.path.join(class_tgt, img_name))
        # Generate augmented images
        for img_name in tqdm(os.listdir(class_src), desc=f"Augmenting {label}"):
            if not img_name.endswith(('.jpg', '.jpeg', '.png')):
                continue
            try:
                img_path = os.path.join(class_src, img_name)
                img = Image.open(img_path).convert('RGB')
                x = np.array(img)
                x = x.reshape((1,) + x.shape)
                i = 0
                for batch in datagen.flow(x, batch_size=1):
                    aug_img = Image.fromarray(batch[0].astype('uint8'))
                    aug_img.save(os.path.join(class_tgt, f"{img_name.split('.')[0]}_aug{i}.jpg"))
                    i += 1
                    if i >= augment_count:
                        break
            except Exception as e:
                print(f"Error augmenting {img_name}: {e}")
        post_aug_counts[label] = len([f for f in os.listdir(class_tgt) if f.endswith(('.jpg', '.jpeg', '.png'))])
    print(f"Post-augmentation image counts: {post_aug_counts}")

augment_and_save(os.path.join(PREPROCESSED_DIR, 'train'), os.path.join(PREPROCESSED_DIR, 'train_full'))

Augmenting Normal: 100%|██████████| 140/140 [08:54<00:00,  3.82s/it]
Augmenting CC: 100%|██████████| 145/145 [09:07<00:00,  3.78s/it]
Augmenting HCC: 100%|██████████| 139/139 [08:49<00:00,  3.81s/it]

Post-augmentation image counts: {'Normal': 840, 'CC': 870, 'HCC': 834}





In [4]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Configuration
PREPROCESSED_DIR = '/content/drive/MyDrive/Liver_Dataset_Preprocessed'
IMG_SIZE = (224, 224)
BATCH_SIZE = 32

# Create data generators
def create_data_generators():
    train_datagen = ImageDataGenerator(rescale=1./255)
    val_test_datagen = ImageDataGenerator(rescale=1./255)
    train_gen = train_datagen.flow_from_directory(
        os.path.join(PREPROCESSED_DIR, 'train_full'),
        target_size=IMG_SIZE, batch_size=BATCH_SIZE, class_mode='categorical', shuffle=True
    )
    val_gen = val_test_datagen.flow_from_directory(
        os.path.join(PREPROCESSED_DIR, 'val'),
        target_size=IMG_SIZE, batch_size=BATCH_SIZE, class_mode='categorical', shuffle=False
    )
    test_gen = val_test_datagen.flow_from_directory(
        os.path.join(PREPROCESSED_DIR, 'test'),
        target_size=IMG_SIZE, batch_size=BATCH_SIZE, class_mode='categorical', shuffle=False
    )
    return train_gen, val_gen, test_gen

train_gen, val_gen, test_gen = create_data_generators()
print(f"Train samples: {train_gen.samples}, Val samples: {val_gen.samples}, Test samples: {test_gen.samples}")

Found 2544 images belonging to 3 classes.
Found 127 images belonging to 3 classes.
Found 128 images belonging to 3 classes.
Train samples: 2544, Val samples: 127, Test samples: 128


In [5]:
import tensorflow as tf
from tensorflow.keras.applications import DenseNet201  # Upgraded to DenseNet201 for better performance
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.optimizers import Adam

# Configuration
NUM_CLASSES = 3

# Build and train initial model
base_model = DenseNet201(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.6)(x)  # Increased dropout
x = Dense(256, activation='relu')(x)  # Increased units
x = BatchNormalization()(x)
x = Dropout(0.4)(x)
output = Dense(NUM_CLASSES, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=output)
model.compile(optimizer=Adam(learning_rate=1e-4), loss='categorical_crossentropy', metrics=['accuracy'])
callbacks = [
    EarlyStopping(patience=10, restore_best_weights=True),
    ReduceLROnPlateau(factor=0.5, patience=5),
    ModelCheckpoint('initial_model.keras', save_best_only=True)
]
history = model.fit(train_gen, epochs=70, validation_data=val_gen, callbacks=callbacks, verbose=0)
train_score = model.evaluate(train_gen, verbose=0)
val_score = model.evaluate(val_gen, verbose=0)
test_score = model.evaluate(test_gen, verbose=0)
print(f"Train Accuracy: {train_score[1]:.4f}, Validation Accuracy: {val_score[1]:.4f}, Test Accuracy: {test_score[1]:.4f}")

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet201_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m74836368/74836368[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 0us/step


  self._warn_if_super_not_called()


Train Accuracy: 0.8243, Validation Accuracy: 0.8425, Test Accuracy: 0.8828


In [6]:
# Fine-tune the model
base_model.trainable = True
for layer in base_model.layers[:int(0.6 * len(base_model.layers))]:  # Adjusted to unfreeze more layers
    layer.trainable = False
model.compile(optimizer=Adam(learning_rate=5e-6), loss='categorical_crossentropy', metrics=['accuracy'])
fine_tune_history = model.fit(train_gen, epochs=50, validation_data=val_gen, callbacks=callbacks, verbose=0)
train_score_ft = model.evaluate(train_gen, verbose=0)
val_score_ft = model.evaluate(val_gen, verbose=0)
test_score_ft = model.evaluate(test_gen, verbose=0)
print(f"Train Accuracy: {train_score_ft[1]:.4f}, Validation Accuracy: {val_score_ft[1]:.4f}, Test Accuracy: {test_score_ft[1]:.4f}")

Train Accuracy: 0.9996, Validation Accuracy: 0.9606, Test Accuracy: 0.9531


In [None]:
from sklearn.model_selection import ParameterGrid

# Hyperparameter tuning
param_grid = {
    'learning_rate': [1e-6, 5e-6, 1e-5],
    'batch_size': [12, 16, 24],
    'dropout_rate': [0.4, 0.5, 0.6]
}
best_accuracy = 0
best_params = None
for params in ParameterGrid(param_grid):
    print(f"Tuning with params: {params}")
    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate=params['learning_rate']),
                  loss='categorical_crossentropy', metrics=['accuracy'])
    history = model.fit(train_gen, epochs=30, validation_data=val_gen,
                        batch_size=params['batch_size'], callbacks=callbacks, verbose=0)
    x = model.layers[-4].output
    x = Dropout(params['dropout_rate'])(x)
    x = model.layers[-2](x)
    output = Dense(NUM_CLASSES, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate=params['learning_rate']),
                  loss='categorical_crossentropy', metrics=['accuracy'])
    model.fit(train_gen, epochs=30, validation_data=val_gen,
              batch_size=params['batch_size'], callbacks=callbacks, verbose=0)
    val_score = model.evaluate(val_gen, verbose=0)
    if val_score[1] > best_accuracy:
        best_accuracy = val_score[1]
        best_params = params
print(f"Best parameters: {best_params}, Best Validation Accuracy: {best_accuracy:.4f}")

In [None]:
# Train with best parameters and final evaluation
model = Model(inputs=base_model.input, outputs=output)
model.compile(optimizer=Adam(learning_rate=best_params['learning_rate']),
              loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(train_gen, epochs=100, validation_data=val_gen,
          batch_size=best_params['batch_size'], callbacks=callbacks, verbose=0)
x = model.layers[-4].output
x = Dropout(best_params['dropout_rate'])(x)
x = model.layers[-2](x)
output = Dense(NUM_CLASSES, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=output)
model.compile(optimizer=Adam(learning_rate=best_params['learning_rate']),
              loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(train_gen, epochs=100, validation_data=val_gen,
          batch_size=best_params['batch_size'], callbacks=callbacks, verbose=0)
final_train_score = model.evaluate(train_gen, verbose=0)
final_val_score = model.evaluate(val_gen, verbose=0)
final_test_score = model.evaluate(test_gen, verbose=0)
print(f"Train Accuracy: {final_train_score[1]:.4f}, Validation Accuracy: {final_val_score[1]:.4f}, Test Accuracy: {final_test_score[1]:.4f}")

In [2]:
import matplotlib.pyplot as plt

# Helper function to plot graphs
def plot_history(initial, fine_tune):
    # Accuracy
    plt.figure(figsize=(14, 6))
    plt.subplot(1, 2, 1)
    plt.plot(initial.history['accuracy'], label='Initial Train Acc')
    plt.plot(initial.history['val_accuracy'], label='Initial Val Acc')
    plt.plot(fine_tune.history['accuracy'], label='Fine-Tune Train Acc')
    plt.plot(fine_tune.history['val_accuracy'], label='Fine-Tune Val Acc')
    plt.title('Training & Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    # Loss
    plt.subplot(1, 2, 2)
    plt.plot(initial.history['loss'], label='Initial Train Loss')
    plt.plot(initial.history['val_loss'], label='Initial Val Loss')
    plt.plot(fine_tune.history['loss'], label='Fine-Tune Train Loss')
    plt.plot(fine_tune.history['val_loss'], label='Fine-Tune Val Loss')
    plt.title('Training & Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    plt.tight_layout()
    plt.show()

plot_history(history, fine_tune_history)

NameError: name 'history' is not defined

In [None]:
# Accuracy scores
labels = ['Train', 'Validation', 'Test']
before = [train_score[1], val_score[1], test_score[1]]
after = [train_score_ft[1], val_score_ft[1], test_score_ft[1]]

x = np.arange(len(labels))
width = 0.35

plt.figure(figsize=(8, 6))
plt.bar(x - width/2, before, width, label='Before Fine-Tuning')
plt.bar(x + width/2, after, width, label='After Fine-Tuning')
plt.ylabel('Accuracy')
plt.title('Accuracy Comparison')
plt.xticks(x, labels)
plt.ylim(0, 1.1)
plt.legend()
plt.grid(True)
plt.show()