In [1]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tqdm import tqdm
import glob

csv_path = 'metadata_compiled_dummies.csv'
base_image_dir = '../YuanDataProcessing'
img_x = 3000 // 6
img_y = 1200 // 6
img_size = (img_x, img_y)

df = pd.read_csv(csv_path)[['uuid', 'status_COVID-19', 'status_healthy', 'status_symptomatic']]
df = df.dropna(subset=['status_COVID-19', 'status_healthy', 'status_symptomatic'])
df[['status_COVID-19', 'status_healthy', 'status_symptomatic']] = df[['status_COVID-19', 'status_healthy', 'status_symptomatic']].astype(int)

all_image_paths = glob.glob(os.path.join(base_image_dir, 'folder_*', '*.png'))
uuid_to_path = {os.path.splitext(os.path.basename(p))[0]: p for p in all_image_paths}

# Load and preprocess images 
X, y = [], []

for _, row in tqdm(df.iterrows(), total=len(df)):
    uuid = row['uuid']
    if uuid in uuid_to_path:
        img = load_img(uuid_to_path[uuid], target_size=img_size)
        img_array = img_to_array(img) / 255.0
        X.append(img_array)
        y.append(row[['status_COVID-19', 'status_healthy', 'status_symptomatic']].values)
    else:
        print(f"Missing image for UUID: {uuid}")

X = np.array(X)
y = np.array(y)

# X = np.load(f'X_{img_x}x{img_y}.npy', allow_pickle=True)
# y = np.load(f'y_{img_x}x{img_y}.npy', allow_pickle=True)

X = np.array(X, dtype=np.float32)
y = np.array(y, dtype=np.float32)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y.argmax(axis=1), random_state=42
)

100%|█████████████████████████████████████| 20664/20664 [34:15<00:00, 10.05it/s]


Class Weights: {0: 5.237959442332066, 1: 0.4450636728320276, 2: 1.7786744136001722}


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m465/465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m337s[0m 721ms/step - accuracy: 0.3162 - loss: 2.1124 - val_accuracy: 0.2358 - val_loss: 1.0914
Epoch 2/20
[1m465/465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m334s[0m 717ms/step - accuracy: 0.2887 - loss: 1.0837 - val_accuracy: 0.1729 - val_loss: 1.1029
Epoch 3/20
[1m465/465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m334s[0m 718ms/step - accuracy: 0.2671 - loss: 1.0745 - val_accuracy: 0.2872 - val_loss: 1.1016
Epoch 4/20
[1m465/465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m343s[0m 737ms/step - accuracy: 0.2996 - loss: 1.0877 - val_accuracy: 0.4389 - val_loss: 1.0550
Epoch 5/20
[1m465/465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m331s[0m 710ms/step - accuracy: 0.3670 - loss: 1.0456 - val_accuracy: 0.2878 - val_loss: 1.1169
Epoch 6/20
[1m465/465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m336s[0m 721ms/step - accuracy: 0.4114 - loss: 0.9722 - val_accuracy: 0.3126 - val_loss: 1.0845
Epoc

In [4]:
# Compute Class Weights 
y_train_labels = np.argmax(y_train, axis=1)

class_weights = class_weight.compute_class_weight(
    class_weight='balanced',
    classes=np.unique(y_train_labels),
    y=y_train_labels
)
class_weights_dict = dict(enumerate(class_weights))
print("Class Weights:", class_weights_dict)

# CNN Model 
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(img_size[0], img_size[1], 3)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dense(3, activation='softmax')  # 3 output classes
])

model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

# Early Stopping Callback 
"""
early_stopping = EarlyStopping(
    monitor='val_loss',  # Monitor the validation loss
    patience=10,  # Wait for 10 epochs with no improvement before stopping
    restore_best_weights=True  # Restore the weights from the best epoch
)
"""

# Train Model with Class Weights and Early Stopping 
history = model.fit(
    X_train, y_train,
    epochs=20,
    batch_size=32,
    validation_split=0.1,
    class_weight=class_weights_dict,
)

# Evaluate Model 
loss, acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {acc:.2%}")

Class Weights: {0: 5.237959442332066, 1: 0.4450636728320276, 2: 1.7786744136001722}


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m465/465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m413s[0m 884ms/step - accuracy: 0.2871 - loss: 1.9672 - val_accuracy: 0.3259 - val_loss: 1.0972
Epoch 2/20
[1m465/465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m375s[0m 804ms/step - accuracy: 0.2234 - loss: 1.1024 - val_accuracy: 0.4021 - val_loss: 1.0789
Epoch 3/20
[1m465/465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m381s[0m 820ms/step - accuracy: 0.3137 - loss: 1.0714 - val_accuracy: 0.3458 - val_loss: 1.0806
Epoch 4/20
[1m465/465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m376s[0m 808ms/step - accuracy: 0.3468 - loss: 1.0352 - val_accuracy: 0.3126 - val_loss: 1.0731
Epoch 5/20
[1m465/465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m392s[0m 842ms/step - accuracy: 0.4183 - loss: 0.9724 - val_accuracy: 0.2461 - val_loss: 1.1372
Epoch 6/20
[1m465/465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m361s[0m 775ms/step - accuracy: 0.4569 - loss: 0.8907 - val_accuracy: 0.4299 - val_loss: 1.0431
Epoc

In [5]:
from sklearn.metrics import confusion_matrix, classification_report

y_pred_probs = model.predict(X_test)
y_pred = np.argmax(y_pred_probs, axis=1)
y_true = np.argmax(y_test, axis=1)

cm = confusion_matrix(y_true, y_pred)
print("Confusion Matrix:")
print(cm)

print("\nClassification Report:")
print(classification_report(y_true, y_pred, target_names=['COVID-19', 'Healthy', 'Symptomatic']))

[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 169ms/step
Confusion Matrix:
[[  15  228   20]
 [ 143 2782  170]
 [  40  682   53]]

Classification Report:
              precision    recall  f1-score   support

    COVID-19       0.08      0.06      0.07       263
     Healthy       0.75      0.90      0.82      3095
 Symptomatic       0.22      0.07      0.10       775

    accuracy                           0.69      4133
   macro avg       0.35      0.34      0.33      4133
weighted avg       0.61      0.69      0.64      4133



In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

y_train_labels = np.argmax(y_train, axis=1)

class_weights = class_weight.compute_class_weight(
    class_weight='balanced',
    classes=np.unique(y_train_labels),
    y=y_train_labels
)
class_weights_dict = dict(enumerate(class_weights))
print("Class Weights:", class_weights_dict)

model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(img_size[0], img_size[1], 3)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dense(3, activation='softmax')  # 3 output classes
])

model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=3,
    restore_best_weights=True
)

lr_scheduler = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=2,
    min_lr=1e-6,
    verbose=1
)

history = model.fit(
    X_train, y_train,
    epochs=25,
    batch_size=32,
    validation_split=0.1,
    class_weight=class_weights_dict,
    callbacks=[early_stopping, lr_scheduler]
)

loss, acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {acc:.2%}")

Class Weights: {0: 5.237959442332066, 1: 0.4450636728320276, 2: 1.7786744136001722}


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
