In [None]:
# thử phương pháp xuất dữ liệu thống kê bằng pp CNN (như code trên git của nhduong)
import os
import json
import numpy as np
import cv2
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

def load_data(json_folder, image_folder, img_size=(128, 128)):
    json_files = [f for f in os.listdir(json_folder) if f.endswith(".json")]
    images = []
    labels = []
    
    for json_file in json_files:
        json_path = os.path.join(json_folder, json_file)
        with open(json_path, "r") as file:
            data = json.load(file)
        
        image_file = data.get("imagePath")
        if not image_file:
            continue
        
        image_path = os.path.join(image_folder, image_file)
        if not os.path.exists(image_path):
            continue
        
        image = cv2.imread(image_path)
        if image is None:
            continue
        
        image = cv2.resize(image, img_size)
        image = image / 255.0  # Chuẩn hóa ảnh
        images.append(image)
        
        label = 1 if "tumor" in data.get("shapes", [{}])[0].get("label", "").lower() else 0
        labels.append(label)
    
    return np.array(images), np.array(labels)
#CNN
def build_model(input_shape):
    model = Sequential([
        Input(shape=input_shape),
        Conv2D(32, (3,3), activation='relu'),
        MaxPooling2D((2,2)),
        BatchNormalization(),
        
        Conv2D(64, (3,3), activation='relu'),
        MaxPooling2D((2,2)),
        BatchNormalization(),
        
        Conv2D(128, (3,3), activation='relu'),
        MaxPooling2D((2,2)),
        BatchNormalization(),
        
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(2, activation='softmax')
    ])
    
    model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

json_folder = "/kaggle/input/btxrd-data/BTXRD/BTXRD/Annotations"
image_folder = "/kaggle/input/btxrd-data/BTXRD/BTXRD/images"
X, y = load_data(json_folder, image_folder)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = build_model(input_shape=(128, 128, 3))
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=32)

loss, acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {acc*100:.2f}%")

Epoch: Số lần mô hình chạy qua toàn bộ tập dữ liệu huấn luyện. Ở đây có 10 epoch.
accuracy: Độ chính xác trên tập huấn luyện.
loss: Giá trị hàm mất mát trên tập huấn luyện.
val_accuracy: Độ chính xác trên tập validation.
val_loss: Giá trị hàm mất mát trên tập validation

In [None]:
# xuất dữ liệu  bằng pp CNN của file1_MNIST_CNN.ipynb
import os
import json
import numpy as np
import cv2
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

IMG_SIZE = 28  # Giống với MNIST
# tải dữ liệu từ JSON
def load_data(json_folder, image_folder):
    images, labels = [], []
    json_files = [f for f in os.listdir(json_folder) if f.endswith(".json")]
    
    for json_file in json_files:
        json_path = os.path.join(json_folder, json_file)
        with open(json_path, "r") as file:
            data = json.load(file)
        
        image_file = data.get("imagePath")
        if not image_file:
            continue
        
        image_path = os.path.join(image_folder, image_file)
        if not os.path.exists(image_path):
            continue
        
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        image = cv2.resize(image, (IMG_SIZE, IMG_SIZE))
        image = image / 255.0  # Chuẩn hóa
        images.append(image)
        
        label = 1 if "tumor" in data.get("shapes", [{}])[0].get("label", "").lower() else 0
        labels.append(label)
    
    return np.array(images).reshape(-1, IMG_SIZE, IMG_SIZE, 1), np.array(labels)

json_folder = "/kaggle/input/btxrd-data/BTXRD/BTXRD/Annotations"
image_folder = "/kaggle/input/btxrd-data/BTXRD/BTXRD/images"
X, y = load_data(json_folder, image_folder)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# CNN
model = Sequential([
    Conv2D(32, kernel_size=(3,3), activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 1)),
    MaxPooling2D(pool_size=(2,2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(1, activation='sigmoid')  # Binary classification
])

model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
history = model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))
loss, acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {acc * 100:.2f}%")

# Hiển thị một số kết quả 
preds = (model.predict(X_test) > 0.5).astype("int32")
fig, axes = plt.subplots(3, 3, figsize=(8, 8))
for i, ax in enumerate(axes.flat):
    ax.imshow(X_test[i].reshape(IMG_SIZE, IMG_SIZE), cmap='gray')
    ax.set_title(f"Pred: {'Tumor' if preds[i] == 1 else 'Normal'}")
    ax.axis('off')
plt.show()

Tăng độ sâu của mô hình CNN để trích xuất đặc trưng tốt hơn.
Thêm Batch Normalization để giúp quá trình huấn luyện ổn định hơn.
Sử dụng Dropout để tránh overfitting.
Thử nghiệm các optimizer khác như Adam với learning rate decay.
Cải thiện việc hiển thị dự đoán bằng cách hiển thị cả xác suất dự đoán trên ảnh. #CAI THIỆN CỦA PP2_Human_Gender_CNN.ipynb
Data Augmentation để cải thiện khả năng tổng quát của mô hình. Batch Normalization & Dropout để giảm overfitting. Tăng số lớp CNN, thêm bộ lọc lớn hơn để học tốt hơn đặc trưng ảnh. Giảm learning rate của Adam optimizer để giúp mô hình hội tụ tốt hơn. Hiển thị xác suất dự đoán cho từng ảnh khi đánh giá kết quả.

In [None]:
# xuất dữ liệu  bằng pp CNN của file 2_Human_Gender_CNN.ipynb
import os
import json
import numpy as np
import cv2
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

IMG_SIZE = 128  # Tăng kích thước ảnh lên phù hợp với mô hình lớn hơn

def load_data(json_folder, image_folder):
    images, labels = [], []
    json_files = [f for f in os.listdir(json_folder) if f.endswith(".json")]
    
    for json_file in json_files:
        json_path = os.path.join(json_folder, json_file)
        with open(json_path, "r") as file:
            data = json.load(file)
        
        image_file = data.get("imagePath")
        if not image_file:
            continue
        
        image_path = os.path.join(image_folder, image_file)
        if not os.path.exists(image_path):
            continue
        
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        image = cv2.resize(image, (IMG_SIZE, IMG_SIZE))
        image = image / 255.0  # Chuẩn hóa
        images.append(image)
        
        label = 1 if "tumor" in data.get("shapes", [{}])[0].get("label", "").lower() else 0
        labels.append(label)
    
    return np.array(images).reshape(-1, IMG_SIZE, IMG_SIZE, 1), np.array(labels)

json_folder = "/kaggle/input/btxrd-data/BTXRD/BTXRD/Annotations"
image_folder = "/kaggle/input/btxrd-data/BTXRD/BTXRD/images"
X, y = load_data(json_folder, image_folder)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Data Augmentation
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    zoom_range=0.2
)
datagen.fit(X_train)

# Xây dựng mô hình CNN với Batch Normalization và Dropout
model = Sequential([
    Conv2D(32, kernel_size=(3,3), activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 1)),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2,2)),
    Dropout(0.25),
    
    Conv2D(64, kernel_size=(3,3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2,2)),
    Dropout(0.25),
    
    Conv2D(128, kernel_size=(3,3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2,2)),
    Dropout(0.25),
    
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')  # Binary classification
])

# Compile mô hình với Adam optimizer và learning rate thấp hơn
model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])

history = model.fit(datagen.flow(X_train, y_train, batch_size=32),
                    validation_data=(X_test, y_test),
                    epochs=20)

loss, acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {acc * 100:.2f}%")

# Hiển thị một số kết quả 
preds = model.predict(X_test)
preds_labels = (preds > 0.5).astype("int32")
fig, axes = plt.subplots(3, 3, figsize=(8, 8))
for i, ax in enumerate(axes.flat):
    ax.imshow(X_test[i].reshape(IMG_SIZE, IMG_SIZE), cmap='gray')
    ax.set_title(f"Pred: {'Tumor' if preds_labels[i] == 1 else 'Normal'}\nProb: {preds[i][0]:.2f}")
    ax.axis('off')
plt.show()

In [None]:
# xuất dữ liệu  bằng pp CNN của file 3_Emotion_Reg_CNN.ipynb
import os
import json
import numpy as np
import cv2
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

IMG_SIZE = 48  # Giống với mô hình nhận diện cảm xúc

def load_data(json_folder, image_folder):
    images, labels = [], []
    json_files = [f for f in os.listdir(json_folder) if f.endswith(".json")]
    
    for json_file in json_files:
        json_path = os.path.join(json_folder, json_file)
        with open(json_path, "r") as file:
            data = json.load(file)
        
        image_file = data.get("imagePath")
        if not image_file:
            continue
        
        image_path = os.path.join(image_folder, image_file)
        if not os.path.exists(image_path):
            continue
        
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        image = cv2.resize(image, (IMG_SIZE, IMG_SIZE))
        image = image / 255.0  # Chuẩn hóa
        images.append(image)
        
        label = 1 if "tumor" in data.get("shapes", [{}])[0].get("label", "").lower() else 0
        labels.append(label)
    
    return np.array(images).reshape(-1, IMG_SIZE, IMG_SIZE, 1), np.array(labels)

json_folder = "/kaggle/input/btxrd-data/BTXRD/BTXRD/Annotations"
image_folder = "/kaggle/input/btxrd-data/BTXRD/BTXRD/images"
X, y = load_data(json_folder, image_folder)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Xây dựng mô hình CNN theo phương pháp file 3_Emotion_Reg_CNN.ipynb
model = Sequential([
    Conv2D(64, kernel_size=(3,3), activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 1)),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2,2)),
    Dropout(0.25),
    
    Conv2D(128, kernel_size=(3,3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2,2)),
    Dropout(0.25),
    
    Conv2D(256, kernel_size=(3,3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2,2)),
    Dropout(0.25),
    
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')  # Binary classification
])

model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])
history = model.fit(X_train, y_train, epochs=20, batch_size=64, validation_data=(X_test, y_test))
loss, acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {acc * 100:.2f}%")

# Hiển thị một số kết quả 
preds = model.predict(X_test)
preds_labels = (preds > 0.5).astype("int32")
fig, axes = plt.subplots(3, 3, figsize=(8, 8))
for i, ax in enumerate(axes.flat):
    ax.imshow(X_test[i].reshape(IMG_SIZE, IMG_SIZE), cmap='gray')
    ax.set_title(f"Pred: {'Tumor' if preds_labels[i] == 1 else 'Normal'}\nConfidence: {preds[i][0]:.2f}")
    ax.axis('off')
plt.show()