In [6]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tqdm import tqdm
import glob

# === Settings ===
csv_path = 'metadata_compiled_dummies.csv'
base_image_dir = '../YuanDataProcessing'
img_x = 600
img_y = 240
img_size = (img_x, img_y)

# === Load CSV and preprocess ===
df = pd.read_csv(csv_path)[['uuid', 'status_COVID-19', 'status_healthy', 'status_symptomatic']]
df = df.dropna(subset=['status_COVID-19', 'status_healthy', 'status_symptomatic'])
df[['status_COVID-19', 'status_healthy', 'status_symptomatic']] = df[['status_COVID-19', 'status_healthy', 'status_symptomatic']].astype(int)

# === Map UUIDs to file paths ===
all_image_paths = glob.glob(os.path.join(base_image_dir, 'folder_*', '*.png'))
uuid_to_path = {os.path.splitext(os.path.basename(p))[0]: p for p in all_image_paths}

# === Load and preprocess images ===
X, y = [], []

for _, row in tqdm(df.iterrows(), total=len(df)):
    uuid = row['uuid']
    if uuid in uuid_to_path:
        img = load_img(uuid_to_path[uuid], target_size=img_size)
        img_array = img_to_array(img) / 255.0
        X.append(img_array)
        y.append(row[['status_COVID-19', 'status_healthy', 'status_symptomatic']].values)
    else:
        print(f"Missing image for UUID: {uuid}")

X = np.array(X)
y = np.array(y)

100%|█████████████████████████████████████| 20664/20664 [34:37<00:00,  9.95it/s]


In [7]:
np.save(f'X_{img_x}x{img_y}.npy', X)
np.save(f'y_{img_x}x{img_y}.npy', y)

In [8]:
X = np.array(X, dtype=np.float32)
y = np.array(y, dtype=np.float32)

In [None]:
# === Train-test split ===
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y.argmax(axis=1), random_state=42
)

# === CNN Model ===
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(img_size[0], img_size[1], 3)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dense(3, activation='softmax')  # 3 output classes
])

model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

# === Train ===
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.1)

# === Evaluate ===
loss, acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {acc:.2%}")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [9]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tqdm import tqdm
import glob

# === Settings ===
csv_path = 'metadata_compiled_dummies.csv'
base_image_dir = '../YuanDataProcessing'
img_x = 128
img_y = 128
img_size = (img_x, img_y)

X = np.load(f'X_{128}x{128}.npy', allow_pickle=True)
y = np.load(f'y_{128}x{128}.npy', allow_pickle=True)

X = np.array(X, dtype=np.float32)
y = np.array(y, dtype=np.float32)

# === Compute Class Weights ===
y_train_labels = np.argmax(y_train, axis=1)
class_weights = class_weight.compute_class_weight(
    class_weight='balanced',
    classes=np.unique(y_train_labels),
    y=y_train_labels
)
class_weights_dict = dict(enumerate(class_weights))
print("Class Weights:", class_weights_dict)

# === CNN Model ===
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(img_size[0], img_size[1], 3)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dense(3, activation='softmax')  # 3 output classes
])

model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

# === Train Model with Class Weights ===
history = model.fit(
    X_train, y_train,
    epochs=30,
    batch_size=32,
    validation_split=0.1,
    class_weight=class_weights_dict
)

# === Evaluate Model ===
loss, acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {acc:.2%}")

Class Weights: {0: 5.237959442332066, 1: 0.4450636728320276, 2: 1.7786744136001722}


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/30
[1m465/465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 103ms/step - accuracy: 0.3080 - loss: 1.5195 - val_accuracy: 0.4698 - val_loss: 1.0709
Epoch 2/30
[1m465/465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 103ms/step - accuracy: 0.4283 - loss: 1.0867 - val_accuracy: 0.1880 - val_loss: 1.1478
Epoch 3/30
[1m465/465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 126ms/step - accuracy: 0.3095 - loss: 1.0913 - val_accuracy: 0.2588 - val_loss: 1.1057
Epoch 4/30
[1m465/465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 115ms/step - accuracy: 0.3209 - loss: 1.0880 - val_accuracy: 0.3247 - val_loss: 1.1103
Epoch 5/30
[1m465/465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 106ms/step - accuracy: 0.3490 - loss: 1.0873 - val_accuracy: 0.3936 - val_loss: 1.0812
Epoch 6/30
[1m465/465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 104ms/step - accuracy: 0.3951 - loss: 1.0513 - val_accuracy: 0.3664 - val_loss: 1.0636
Epoch 7/30

In [10]:
from sklearn.metrics import confusion_matrix, classification_report

# === Predict on test set ===
y_pred_probs = model.predict(X_test)
y_pred = np.argmax(y_pred_probs, axis=1)
y_true = np.argmax(y_test, axis=1)

# === Confusion Matrix ===
cm = confusion_matrix(y_true, y_pred)
print("Confusion Matrix:")
print(cm)

# === Classification Report (optional, includes precision, recall, f1) ===
print("\nClassification Report:")
print(classification_report(y_true, y_pred, target_names=['COVID-19', 'Healthy', 'Symptomatic']))

[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 26ms/step
Confusion Matrix:
[[  14  204   45]
 [ 159 2479  457]
 [  41  641   93]]

Classification Report:
              precision    recall  f1-score   support

    COVID-19       0.07      0.05      0.06       263
     Healthy       0.75      0.80      0.77      3095
 Symptomatic       0.16      0.12      0.14       775

    accuracy                           0.63      4133
   macro avg       0.32      0.32      0.32      4133
weighted avg       0.59      0.63      0.61      4133



In [None]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tqdm import tqdm
import glob

# === Settings ===
csv_path = 'metadata_compiled_dummies.csv'
base_image_dir = '../YuanDataProcessing'
img_x = 128
img_y = 128
img_size = (img_x, img_y)

X = np.load(f'X_{128}x{128}.npy', allow_pickle=True)
y = np.load(f'y_{128}x{128}.npy', allow_pickle=True)

X = np.array(X, dtype=np.float32)
y = np.array(y, dtype=np.float32)

# === Compute Class Weights ===
y_train_labels = np.argmax(y_train, axis=1)
class_weights = class_weight.compute_class_weight(
    class_weight='balanced',
    classes=np.unique(y_train_labels),
    y=y_train_labels
)
class_weights_dict = dict(enumerate(class_weights))
print("Class Weights:", class_weights_dict)

# === CNN Model ===
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(img_size[0], img_size[1], 3)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dense(3, activation='softmax')  # 3 output classes
])

model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

# === Early Stopping Callback ===
early_stopping = EarlyStopping(
    monitor='val_loss',  # Monitor the validation loss
    patience=5,  # Wait for 5 epochs with no improvement before stopping
    restore_best_weights=True  # Restore the weights from the best epoch
)

# === Train Model with Class Weights and Early Stopping ===
history = model.fit(
    X_train, y_train,
    epochs=20,
    batch_size=32,
    validation_split=0.1,
    class_weight=class_weights_dict,
    callbacks=[early_stopping]  # Add the early stopping callback
)

# === Evaluate Model ===
loss, acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {acc:.2%}")

Class Weights: {0: 5.237959442332066, 1: 0.4450636728320276, 2: 1.7786744136001722}


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m465/465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 105ms/step - accuracy: 0.3133 - loss: 1.1976 - val_accuracy: 0.1378 - val_loss: 1.1251
Epoch 2/20
[1m465/465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 105ms/step - accuracy: 0.2975 - loss: 1.0701 - val_accuracy: 0.1245 - val_loss: 1.1224
Epoch 3/20
[1m465/465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 104ms/step - accuracy: 0.2578 - loss: 1.1018 - val_accuracy: 0.2455 - val_loss: 1.0941
Epoch 4/20
[1m465/465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 102ms/step - accuracy: 0.2699 - loss: 1.1083 - val_accuracy: 0.3434 - val_loss: 1.0936
Epoch 5/20
[1m465/465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 102ms/step - accuracy: 0.3252 - loss: 1.0736 - val_accuracy: 0.2322 - val_loss: 1.1175
Epoch 6/20
[1m465/465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 103ms/step - accuracy: 0.3589 - loss: 1.0546 - val_accuracy: 0.2588 - val_loss: 1.1072
Epoch 7/20