In [16]:
import pandas as pd
import os
import shutil
import cv2
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout


# Define the path to the CSV file
File_Path = "Data_Entry_2017.csv"

# Load your DataFrame from a CSV file or any other source
df = pd.read_csv('Data_Entry_2017.csv')

# Now you can use df in your code

# Define the list of labels to search for
labels_to_search = ["Atelectasis", "Cardiomegaly", "Consolidation", "Edema", "Effusion",
                    "Emphysema", "Fibrosis", "Hernia", "Infiltration", "Mass",
                    "Nodule", "Pleural_Thickening", "Pneumonia", "Pneumothorax"]

# Generate the list of image folders
base_folder_name = "images_"
num_folders = 12
Image_Folders = [f"{base_folder_name}{str(i).zfill(3)}" for i in range(1, num_folders + 1)]

# Create directories for each label and copy corresponding images
for label in labels_to_search:
    target_dir = f"Progress/{label}"
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)
        print(f"{label} folder created")
    else:
        continue
    cnt = 0  # Counter for the number of copied images

    # Loop through each row in the DataFrame
    for (_, row) in df.iterrows():
        if row["Finding Labels"] == label and row["View Position"] == "PA":
            filename = row["Image Index"]
            # Search for the image in each image folder
            for folder in Image_Folders:
                image_path = os.path.join(folder, "images", filename)
                if os.path.exists(image_path):
                    image_copy_path = os.path.join(target_dir, filename)
                    shutil.copy2(image_path, image_copy_path)
    #print(f"Moving image {filename} for label {label}")
                    cnt += 1
                    break  # Stop searching once the image is found and copied

    print(f"Total {label} images copied: {cnt}")

# Initialize lists to store images and labels

In [None]:
import os
import cv2
import numpy as np

# Define the path to the dataset directory
dataset_path = 'Progress'

# Initialize empty arrays to store images and labels
# Initialize empty arrays to store images and labels
images = []
labels = []

# Initialize a dictionary to map class names to indices
class_index = {}

# Iterate through each directory (class) in the dataset directory
for index, class_name in enumerate(os.listdir(dataset_path)):
    class_path = os.path.join(dataset_path, class_name)
    
    # Skip if the current path is not a directory
    if not os.path.isdir(class_path):
        continue
    
    # Map class name to index
    class_index[class_name] = index
    
    # Iterate through each image in the current class directory
    for image_name in os.listdir(class_path):
        image_path = os.path.join(class_path, image_name)
        
        # Read the image using OpenCV
        image = cv2.imread(image_path)
        
        # Check if the image was successfully loaded
        if image is None:
            print(f"Error: Unable to read image '{image_path}'")
            continue  # Skip this image and move to the next one
        
        # Optionally, you can resize the image to a specific size if needed
        image = cv2.resize(image, (224, 224))
        image = image / 255.0
        # Append the image to the images array
        images.append(image)
        
        # Append the index corresponding to the class name to the labels array
        labels.append(index)  # Use index as the label (single value)

# Convert the images and labels arrays to numpy arrays
images = np.array(images)
labels = np.array(labels)

# Print the shapes of images and labels arrays
print("Shape of images array:", images.shape)
print("Shape of labels array:", labels.shape)

# Print the class index mapping
print("Class index mapping:", class_index)



In [4]:
# Initialize ImageDataGenerator for data augmentation

from tensorflow.keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(
    rotation_range=20,      # Random rotation within the range [-20, 20] degrees
    width_shift_range=0.1,  # Random horizontal shift by up to 10% of the image width
    height_shift_range=0.1, # Random vertical shift by up to 10% of the image height
    zoom_range=0.1,         # Random zoom by up to 10%
    horizontal_flip=True,  # Random horizontal flipping
    vertical_flip=False,   # No vertical flipping
    fill_mode='nearest'    # Fill mode for newly created pixels (nearest)
)

# Create augmented images and labels
augmented_images = []
augmented_labels = []

# Iterate through each original image and apply data augmentation
for image, label in zip(images, labels):
    # Reshape image to (1, height, width, channels) for the generator
    image = np.expand_dims(image, axis=0)
    
    # Generate augmented images and labels
    augmented_gen = datagen.flow(image, batch_size=1)
    for _ in range(3):  # Generate 3 augmented samples per original image
        augmented_image = augmented_gen.next()[0]  # Get the augmented image from the generator
        augmented_images.append(augmented_image)
        augmented_labels.append(label)

# Convert the augmented images and labels arrays to numpy arrays
augmented_images = np.array(augmented_images)
augmented_labels = np.array(augmented_labels)

# Optionally, visualize some augmented images and their corresponding labels
# You can use matplotlib or other libraries for visualization

# Concatenate original and augmented images and labels
all_images = np.concatenate((images, augmented_images), axis=0)
all_labels = np.concatenate((labels, augmented_labels), axis=0)

# Shuffle the data to ensure randomness
shuffle_indices = np.random.permutation(len(all_images))
all_images = all_images[shuffle_indices]
all_labels = all_labels[shuffle_indices]


AttributeError: 'NumpyArrayIterator' object has no attribute 'next'

In [43]:
len(augmented_images)

10476

In [22]:
pip install --upgrade tensorflow


Collecting tensorflow
  Using cached tensorflow-2.16.1-cp311-cp311-win_amd64.whl.metadata (3.5 kB)
Collecting tensorflow-intel==2.16.1 (from tensorflow)
  Using cached tensorflow_intel-2.16.1-cp311-cp311-win_amd64.whl.metadata (5.0 kB)
Collecting ml-dtypes~=0.3.1 (from tensorflow-intel==2.16.1->tensorflow)
  Using cached ml_dtypes-0.3.2-cp311-cp311-win_amd64.whl.metadata (20 kB)
Collecting tensorboard<2.17,>=2.16 (from tensorflow-intel==2.16.1->tensorflow)
  Using cached tensorboard-2.16.2-py3-none-any.whl.metadata (1.6 kB)
Collecting keras>=3.0.0 (from tensorflow-intel==2.16.1->tensorflow)
  Using cached keras-3.3.3-py3-none-any.whl.metadata (5.7 kB)
Using cached tensorflow-2.16.1-cp311-cp311-win_amd64.whl (2.1 kB)
Using cached tensorflow_intel-2.16.1-cp311-cp311-win_amd64.whl (377.0 MB)
Using cached keras-3.3.3-py3-none-any.whl (1.1 MB)
Using cached ml_dtypes-0.3.2-cp311-cp311-win_amd64.whl (127 kB)
Using cached tensorboard-2.16.2-py3-none-any.whl (5.5 MB)
Installing collected packag

  You can safely remove it manually.
  You can safely remove it manually.


In [23]:


from sklearn.utils.class_weight import compute_class_weight

# Get unique classes


# Calculate class weights
class_weights = compute_class_weight('balanced', classes = np.unique(labels),y= labels)

# Convert class weights to a dictionary
class_weight_dict = dict(zip(np.unique(labels), class_weights))

class_weight_dict
from sklearn.model_selection import train_test_split

# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(images, labels, test_size=0.25)

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

# Build a model for multi-label classification
# Build a model for multi-label classification with 4 classes



In [24]:
class_weight_dict

{0: 1.7233785822021117,
 1: 5.1054512957998215,
 2: 11.007707129094412,
 3: 74.6797385620915,
 4: 1.8258229466283158,
 5: 7.254603174603175,
 6: 5.877572016460905,
 7: 38.863945578231295,
 8: 0.722707147375079,
 9: 2.78614971958059,
 10: 0.09690770613878853,
 11: 1.9795564795564795,
 12: 4.352761904761905,
 13: 21.640151515151516,
 14: 2.528995130588756}

In [29]:
from keras.applications import DenseNet121
from keras.layers import Dense, GlobalAveragePooling2D
from keras.models import Model

# Step 1: Define the DenseNet model architecture
base_model = DenseNet121(weights='imagenet', include_top=False)  # Load pre-trained weights from ImageNet

# Add custom layers on top of the base model
x = base_model.output
x = GlobalAveragePooling2D()(x)  # Global average pooling layer
x = Dense(1024, activation='relu')(x)  # Fully connected layer
predictions = Dense(15, activation='sigmoid')(x)  # Output layer

# Create the final model
model = Model(inputs=base_model.input, outputs=predictions)

from keras.optimizers import Adam

optimizer = Adam(learning_rate=0.01) 
from keras.metrics import Precision
# Step 2: Compile the model
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy',Precision()])
model.summary()



# Save the model
model.save('using_dense.keras')
print("DONE")

DONE


In [30]:
from tensorflow.keras.utils import to_categorical

# Convert the labels to one-hot encoded arrays
y_train_encoded = to_categorical(y_train, num_classes=15)
y_test_encoded = to_categorical(y_test, num_classes=15)


In [None]:
tf.config.run_functions_eagerly(True)
model.fit(x_train, y_train_encoded, batch_size=32, epochs=5, validation_data=(x_test, y_test_encoded), class_weight=class_weight_dict)


Epoch 1/5
[1m1339/1339[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6006s[0m 4s/step - accuracy: 0.0179 - loss: 0.2608 - precision_1: 0.0493 - val_accuracy: 0.0034 - val_loss: 0.3137 - val_precision_1: 0.0390
Epoch 2/5
[1m1339/1339[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6182s[0m 5s/step - accuracy: 0.0253 - loss: 0.2581 - precision_1: 0.0000e+00 - val_accuracy: 0.0046 - val_loss: 0.2502 - val_precision_1: 0.0000e+00
Epoch 3/5
[1m 459/1339[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m [1m16:52:21[0m 69s/step - accuracy: 0.0175 - loss: 0.2463 - precision_1: 0.0000e+00

In [43]:
import cv2
import numpy as np
from tensorflow.keras.models import load_model

# Load your trained model
model = load_model('multi_class_model.keras')

# Read and preprocess the new image
image = cv2.imread('card.png')
image = cv2.resize(image, (100, 100))  # Resize to match input size
image = image / 255.0  # Normalize pixel values

# Expand dimensions to create a batch of size 1
image = np.expand_dims(image, axis=0)

# Predict on the new image
predictions = model.predict(image)
print (predictions)
# Post-process the predictions (if needed)
# For example, if your model outputs probabilities, you can find the class with the highest probability
predicted_class = np.argmax(predictions)

# Print the predicted class
print("Predicted Class:", predicted_class)




[[0.4938257  0.49460873 0.50827056]]
Predicted Class: 2
