In [21]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout,BatchNormalization,GlobalAveragePooling2D
from tensorflow.keras.applications import VGG16

In [17]:
# Define the paths
benign_folder = 'Thyroid Dataset/benign/'
malignant_folder = 'Thyroid Dataset/malignant/'

# Create a list of image paths and labels
benign_images = [os.path.join(benign_folder, img) for img in os.listdir(benign_folder)]
malignant_images = [os.path.join(malignant_folder, img) for img in os.listdir(malignant_folder)]

all_images = benign_images + malignant_images
labels = [0] * len(benign_images) + [1] * len(malignant_images)

# Convert labels to strings
labels = [str(label) for label in labels]

# Split the data into training and validation sets
train_images, val_images, train_labels, val_labels = train_test_split(all_images, labels, test_size=0.2, random_state=42)

# Create data generators
train_datagen = ImageDataGenerator(rescale=1.0/255, horizontal_flip=True, zoom_range=0.2, rotation_range=20)
val_datagen = ImageDataGenerator(rescale=1.0/255)

def create_data_generator(datagen, images, labels, batch_size=32):
    data = {'filename': images, 'class': labels}
    df = pd.DataFrame(data)
    generator = datagen.flow_from_dataframe(df, x_col='filename', y_col='class', class_mode='binary', target_size=(150, 150), batch_size=batch_size)
    return generator

train_generator = create_data_generator(train_datagen, train_images, train_labels)
val_generator = create_data_generator(val_datagen, val_images, val_labels)

Found 372 validated image filenames belonging to 2 classes.
Found 94 validated image filenames belonging to 2 classes.


In [12]:
# Load the pre-trained VGG16 model without the top layers
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(150, 150, 3))

# Freeze the base model layers
base_model.trainable = False

# Build the model
model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')  # Output layer for binary classification
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])



Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 0us/step


In [13]:
# Train the model
history = model.fit(train_generator, epochs=20, validation_data=val_generator)

Epoch 1/20
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 1s/step - accuracy: 0.6378 - loss: 0.7141 - val_accuracy: 0.6809 - val_loss: 0.6317
Epoch 2/20
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 1s/step - accuracy: 0.6483 - loss: 0.6674 - val_accuracy: 0.6809 - val_loss: 0.6301
Epoch 3/20
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 1s/step - accuracy: 0.6927 - loss: 0.6404 - val_accuracy: 0.6809 - val_loss: 0.6164
Epoch 4/20
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 1s/step - accuracy: 0.6862 - loss: 0.6386 - val_accuracy: 0.6809 - val_loss: 0.6253
Epoch 5/20
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 1s/step - accuracy: 0.6875 - loss: 0.6436 - val_accuracy: 0.6809 - val_loss: 0.6122
Epoch 6/20
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 1s/step - accuracy: 0.6702 - loss: 0.6740 - val_accuracy: 0.6809 - val_loss: 0.6209
Epoch 7/20
[1m12/12[0m [32m━━━━━━━━━━

In [14]:
# Evaluate the model
loss, accuracy = model.evaluate(val_generator)
print(f'Validation Accuracy: {accuracy*100:.2f}%')

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 919ms/step - accuracy: 0.6529 - loss: 0.6311
Validation Accuracy: 68.09%


In [19]:
# Save the model
model.save('thyroid_nodule_classifier.h5')



In [22]:
# Correctly specify the image path
img_path = 'Thyroid Dataset/benign/1_1.jpg'  # Make sure to enclose the path in quotes

def predict_image(model, image_path):
    # Load and preprocess the image
    img = load_img(image_path, target_size=(150, 150))
    img_array = img_to_array(img) / 255.0  # Normalize the image
    img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension
    
    # Make a prediction
    prediction = model.predict(img_array)
    
    # Return the class based on the prediction
    return 'malignant' if prediction[0][0] > 0.5 else 'benign'

# Load the saved model
model = tf.keras.models.load_model('thyroid_nodule_classifier.h5')

# Example prediction using the defined img_path
prediction = predict_image(model, img_path)  # Use the correct path to the image you want to predict
print(f'The predicted class for the image is: {prediction}')



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 340ms/step
The predicted class for the image is: benign
