In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os

# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

        
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# check GPU  (I got a Tesla P100 today)
!nvidia-smi

In [None]:
from tensorflow import keras
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers
from tensorflow.keras import Model

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

In [None]:
# simply show a picture
plt.figure(figsize= (10, 10))
img = mpimg.imread("../input/lungs-disease-dataset-4-types/Lung Disease Dataset/train/Normal/test_0_9774.jpeg")
plt.imshow(img)
plt.show()

In [None]:
# ImageDataGenerator (only can adjust on training data)
traingen = ImageDataGenerator(rescale= 1./255,
                             width_shift_range=0.2 , 
                             height_shift_range=0.2 ,
                             zoom_range=0.2)
valgen = ImageDataGenerator(rescale= 1./255)
testgen = ImageDataGenerator(rescale= 1./255)

In [None]:
# flow_from_directory
train_it = traingen.flow_from_directory("../input/lungs-disease-dataset-4-types/Lung Disease Dataset/train", target_size = (224, 224))
val_it = traingen.flow_from_directory("../input/lungs-disease-dataset-4-types/Lung Disease Dataset/val", target_size = (224, 224))
test_it = traingen.flow_from_directory("../input/lungs-disease-dataset-4-types/Lung Disease Dataset/test", target_size = (224, 224))


In [None]:
# show the picture after ImageDataGenerator
plt.figure()
plt.imshow(next(train_it)[0][0])
plt.show()

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, Model

# Data augmentation
traingen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.2,
    shear_range=0.2,
    brightness_range=[0.8, 1.2],
    horizontal_flip=True,
    fill_mode='nearest'
)
valgen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)
testgen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)

# Data generators
train_it = traingen.flow_from_directory("/kaggle/input/lungs-disease-dataset-4-types/Lung Disease Dataset/train", target_size=(224, 224), batch_size=32, class_mode='categorical')
val_it = valgen.flow_from_directory("/kaggle/input/lungs-disease-dataset-4-types/Lung Disease Dataset/val", target_size=(224, 224), batch_size=32, class_mode='categorical', shuffle=False)
test_it = testgen.flow_from_directory("/kaggle/input/lungs-disease-dataset-4-types/Lung Disease Dataset/test", target_size=(224, 224), batch_size=32, class_mode='categorical', shuffle=False)

# Load the pre-trained DenseNet201 model
base_model = tf.keras.applications.DenseNet201(input_shape=(224, 224, 3),
                                               include_top=False,
                                               weights='imagenet')

# Phase 1: Train custom layers
for layer in base_model.layers:
    layer.trainable = False

x = layers.GlobalAveragePooling2D()(base_model.output)
x = layers.Dropout(0.5)(x)
x = layers.Dense(512, activation='relu')(x)
x = layers.BatchNormalization()(x)
x = layers.Dense(512, activation='relu')(x)
x = layers.BatchNormalization()(x)
x = layers.Dense(5, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=x)

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, min_lr=1e-6),
    tf.keras.callbacks.ModelCheckpoint("fin_{epoch}_model.keras", monitor="val_accuracy", save_best_only=True, verbose=1, mode="auto")
]

history_phase1 = model.fit(
    train_it,
    steps_per_epoch=train_it.samples // train_it.batch_size - 1,
    validation_data=val_it,
    validation_steps=val_it.samples // val_it.batch_size - 1,
    epochs=10,
    callbacks=callbacks
)

# Phase 2: Fine-tune the entire model
for layer in base_model.layers:
    layer.trainable = True

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

history_phase2 = model.fit(
    train_it,
    steps_per_epoch=train_it.samples // train_it.batch_size - 1,
    validation_data=val_it,
    validation_steps=val_it.samples // val_it.batch_size - 1,
    epochs=20,
    callbacks=callbacks
)

# Evaluate the model on the training set
train_loss, train_accuracy = model.evaluate(train_it, steps=train_it.samples // train_it.batch_size, verbose=1)
print(f"Training Loss: {train_loss:.4f}")
print(f"Training Accuracy: {train_accuracy:.4f}")

# Evaluate the model on the validation set
val_loss, val_accuracy = model.evaluate(val_it, steps=val_it.samples // val_it.batch_size, verbose=1)
print(f"Validation Loss: {val_loss:.4f}")
print(f"Validation Accuracy: {val_accuracy:.4f}")

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(test_it, steps=test_it.samples // test_it.batch_size, verbose=1)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")


In [None]:
# Combine history from both phases
def combine_history(history1, history2):
    history = {}
    for key in history1.history.keys():
        history[key] = history1.history[key] + history2.history[key]
    return history

combined_history = combine_history(history_phase1, history_phase2)

# Plot training & validation accuracy values
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(combined_history['accuracy'])
plt.plot(combined_history['val_accuracy'])
plt.title('Model accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(['Train', 'Validation'], loc='upper left')

# Plot training & validation loss values
plt.subplot(1, 2, 2)
plt.plot(combined_history['loss'])
plt.plot(combined_history['val_loss'])
plt.title('Model loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(['Train', 'Validation'], loc='upper left')

plt.show()


In [None]:

import tensorflow as tf
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator
model = model

# Assuming your model and data generators are defined as follows:
# model = your_trained_model
# validation_generator = your_validation_data_generator

# Step 1: Generate predictions
predictions = model.predict(test_it)
predicted_classes = np.argmax(predictions, axis=1)

# Step 2: Get true labels
true_classes = test_it.classes

# Step 3: Get class names
class_labels = list(test_it.class_indices.keys())

# Step 4: Calculate the confusion matrix using TensorFlow
conf_matrix = tf.math.confusion_matrix(true_classes, predicted_classes).numpy()

# Step 5: Calculate accuracy using TensorFlow
accuracy = tf.keras.metrics.Accuracy()
accuracy.update_state(true_classes, predicted_classes)
accuracy_value = accuracy.result().numpy()

# Step 6: Plot the confusion matrix with class names and accuracy
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=class_labels, yticklabels=class_labels)
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")

# Add accuracy text
plt.text(0, -0.3, f'Accuracy of Model: {accuracy_value:.2f}', ha='center', va='center', transform=plt.gca().transAxes, fontsize=12, color='red')

plt.show()


In [None]:
# Calculate accuracy, F1 score, precision, and recall
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

accuracy = accuracy_score(true_classes, predicted_classes)
f1 = f1_score(true_classes, predicted_classes, average='weighted')
precision = precision_score(true_classes, predicted_classes, average='weighted')
recall = recall_score(true_classes, predicted_classes, average='weighted')

# Print the metrics
print(f'Accuracy: {accuracy}')
print(f'F1 Score: {f1}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, Model

# Load the pre-trained DenseNet201 model
base_model = tf.keras.applications.DenseNet201(input_shape=(224, 224, 3),
                                                   include_top=False,
                                                   weights='imagenet')

# Lock the layers of the base model
for layer in base_model.layers:
    layer.trainable = False

# Add custom layers on top of the base model
#x = layers.Flatten()(base_model.output)
x = layers.GlobalAveragePooling2D()(base_model.output)  # Using GlobalAveragePooling2D instead of Flatten
x = layers.Dropout(0.1)(x)
x = layers.Dense(512, activation='relu')(x)
#x = layers.Dense(256, activation='relu')(x)
x = layers.BatchNormalization()(x)  # Add Batch Normalization
x = layers.Dense(5, activation='softmax')(x)

# Create the final model
model2 = Model(inputs=base_model.input, outputs=x)

# Compile the model
model2.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),  # Set a lower learning rate
               loss='categorical_crossentropy',
               metrics=['accuracy'])


# Callbacks
callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, min_lr=1e-6),
    tf.keras.callbacks.ModelCheckpoint("fin_{epoch}_model.keras", monitor="val_accuracy", save_best_only= True, verbose=1,mode="auto")
]


model2.summary()

### Earlystop
> This time I forget to use earlystop. Maybe I need to use it next time (because loss and acc don't change at finall)

In [None]:

#model2.fit(train_it, validation_data= val_it, epochs=100, callbacks= callbacks, steps_per_epoch=60, validation_steps=5)
# Train the model

history = model2.fit(
    train_it,
    steps_per_epoch=train_it.samples // train_it.batch_size - 1,
    validation_data=val_it,
    validation_steps=val_it.samples // val_it.batch_size - 1,
    epochs=20,  # You can increase the number of epochs for more training
    callbacks=callbacks
    
)

In [None]:
model_dense = keras.models.load_model('fin_18_model.keras')
model_dense.evaluate(test_it, steps=1)model_dense = keras.models.load_model('densenet201.hdf5')
model_dense.evaluate(test_it, steps= 1)