In [8]:
import numpy as np
import pandas as pd
import os
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input, decode_predictions
from tensorflow.keras.preprocessing import image
from fuzzywuzzy import fuzz

Load and Test Pretrained Inception Model 

In [9]:
# Load the pre-trained InceptionV3 model
model = InceptionV3(weights='imagenet')

# Function to preprocess the image and predict its label
def predict_image_label(img_path):
    # Load the image file, resizing it to 299x299 pixels (as required by InceptionV3)
    img = image.load_img(img_path, target_size=(299, 299))
    
    # Convert the image to a numpy array and add an additional dimension (for batch size)
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    
    # Preprocess the image for the InceptionV3 model
    img_array = preprocess_input(img_array)
    
    # Predict the probabilities across all output classes
    predictions = model.predict(img_array)
    
    # Decode the predictions to get human-readable labels
    decoded_predictions = decode_predictions(predictions, top=3)[0]
    predictions_list = []
    print(f"Predicted labels for {os.path.basename(img_path)} (top-3):")
    for i, (imagenet_id, label, score) in enumerate(decoded_predictions):
        print(f"{i+1}: {label} ({score*100:.2f}%)")
        predictions_list.append((label, score))
    return predictions_list

# Path to the folder containing images
folder_path = 'Example Data-20240208T214429Z-001/Example Data/exported'

predictions_dict = {}
# Iterate over all files in the folder
for filename in os.listdir(folder_path):
    if filename.lower().endswith((".png", ".jpg", ".jpeg")):  # Check for common image file extensions
        img_path = os.path.join(folder_path, filename)
        predictions_dict[filename] = predict_image_label(img_path)

# Print the filenames and corresponding predicted labels
for filename, predicted_labels in predictions_dict.items():
    print(f"\nFilename: {filename}")
    print("Predicted labels (top-3):")
    for i, (label, score) in enumerate(predicted_labels):
        print(f"{i+1}: {label} ({score*100:.2f}%)")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
Predicted labels for (1) 18380579401063495.jpg (top-3):
1: motor_scooter (90.88%)
2: moped (1.75%)
3: snowmobile (1.61%)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
Predicted labels for (1) 18380579401063495.png (top-3):
1: motor_scooter (89.87%)
2: snowmobile (1.72%)
3: moped (1.67%)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
Predicted labels for (1) @GreyCupFestival - 109th Grey Cup.jpeg (top-3):
1: stage (58.90%)
2: moving_van (2.70%)
3: mortarboard (1.68%)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
Predicted labels for (10) 17887803224903630.jpeg (top-3):
1: seashore (39.63%)
2: sandbar (7.99%)
3: Eskimo_dog (3.33%)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
Predicted labels for (11) 17997439897932301.png (top-3):
1: crash_helmet (60.60%)
2: motor_scooter (5.71%)
3: moped (3.50%)
[1m1/1[0m [32m━━━

Preprocces the images in the images folder

In [10]:
def preprocess_image(img_path):
    try:
        # Load the image file, resizing it to 299x299 pixels (as required by InceptionV3)
        img = image.load_img(img_path, target_size=(299, 299))
        
        # Convert the image to a numpy array
        img_array = image.img_to_array(img)
        
        # Add a dimension to the array for batch size
        img_array = np.expand_dims(img_array, axis=0)
        
        # Apply the specific preprocessing required by InceptionV3
        img_array = preprocess_input(img_array)
        
        return img_array
    except Exception as e:
        print(f"Error processing {img_path}: {e}")
        return None

In [11]:
def preprocess_images_in_folder(folder_path):
    preprocessed_images = []
    
    # Iterate over all files in the folder
    for filename in os.listdir(folder_path):
        if filename.lower().endswith((".png", ".jpg", ".jpeg")):  # Check for common image file extensions
            img_path = os.path.join(folder_path, filename)
            img_array = preprocess_image(img_path)
            if img_array is not None:
                preprocessed_images.append(img_array)
    
    return preprocessed_images

# Preprocess all images in the specified folder
preprocessed_images = preprocess_images_in_folder(folder_path)

Extracting the image labels from excel file

In [12]:
excel_path = 'Example Data-20240208T214429Z-001/Example Data/Imagelabels.xlsx'


# Function to read the Excel file and extract image names and labels
def read_labels_from_excel(excel_path):
    # Read the Excel file
    df = pd.read_excel(excel_path)
    
    # the Excel file has columns 'Image Name' and 'Label'
    labels_dict = pd.Series(df.Label.values, index=df['Image Name']).to_dict()
    
    return labels_dict

# Call the function and store the result in a variable
actual_labels_dict = read_labels_from_excel(excel_path)


In [13]:
def preprocess_image(img_path):
    try:
        img = image.load_img(img_path, target_size=(299, 299))
        img_array = image.img_to_array(img)
        img_array = np.expand_dims(img_array, axis=0)
        img_array = preprocess_input(img_array)
        return img_array
    except Exception as e:
        print(f"Error processing {img_path}: {e}")
        return None

def predict_image_label(img_path):
    img_array = preprocess_image(img_path)
    if img_array is not None:
        predictions = model.predict(img_array)
        decoded_predictions = decode_predictions(predictions, top=1)[0]
        return decoded_predictions[0][1]  # Return only the top prediction label
    return None

def read_labels_from_excel(excel_path):
    df = pd.read_excel(excel_path)
    labels_dict = pd.Series(df.Label.values, index=df['Image Name']).to_dict()
    return labels_dict

# Function to compare predicted and actual labels for similarity
def compare_label_similarity(predicted_label, actual_label):
    return fuzz.ratio(predicted_label.lower(), actual_label.lower())

# Main processing function
def process_images_and_compare_labels(folder_path, excel_path):
    actual_labels_dict = read_labels_from_excel(excel_path)
    similarities = []

    for filename in os.listdir(folder_path):
        if filename.lower().endswith((".png", ".jpg", ".jpeg")):
            img_path = os.path.join(folder_path, filename)
            predicted_label = predict_image_label(img_path)
            actual_label = actual_labels_dict.get(filename)

            if predicted_label and actual_label:
                similarity_score = compare_label_similarity(predicted_label, actual_label)
                similarities.append(similarity_score)
                print(f"{filename}: Predicted Label - {predicted_label}, Actual Label - {actual_label}, Similarity - {similarity_score}%")

    if similarities:
        average_similarity = sum(similarities) / len(similarities)
        print(f"Average Label Similarity: {average_similarity}%")
    else:
        print("No images processed.")

# Update with your actual folder and Excel paths
folder_path = 'Example Data-20240208T214429Z-001/Example Data/exported'
excel_path = 'Example Data-20240208T214429Z-001/Example Data/Imagelabels.xlsx'

process_images_and_compare_labels(folder_path, excel_path)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step
(1) @GreyCupFestival - 109th Grey Cup.jpeg: Predicted Label - stage, Actual Label - Glasses, Similarity - 33%
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
(10) 17887803224903630.jpeg: Predicted Label - seashore, Actual Label - Woman, Similarity - 15%
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
(12) 17985809330117499.jpeg: Predicted Label - moped, Actual Label - Night, Similarity - 0%
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
(13) 18013990822817757.jpeg: Predicted Label - jersey, Actual Label - Paper, Similarity - 36%
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
(14) 17993584322154200.jpeg: Pre

In [14]:

# Main processing and evaluation function
def evaluate_image_labeling_system(folder_path, excel_path, similarity_threshold=80):
    actual_labels_dict = read_labels_from_excel(excel_path)
    similarities = []

    for filename in os.listdir(folder_path):
        if filename.lower().endswith((".png", ".jpg", ".jpeg")):
            img_path = os.path.join(folder_path, filename)
            predicted_label = predict_image_label(img_path)
            actual_label = actual_labels_dict.get(filename)

            if predicted_label and actual_label:
                similarity_score = compare_label_similarity(predicted_label, actual_label)
                similarities.append(similarity_score)
                print(f"{filename}: Predicted - {predicted_label}, Actual - {actual_label}, Similarity - {similarity_score}%")

    if similarities:
        average_similarity = sum(similarities) / len(similarities)
        print(f"\nAverage Label Similarity: {average_similarity}%")
        
        if average_similarity >= similarity_threshold:
            print(f"The system meets the required similarity threshold of {similarity_threshold}%. System evaluation passed.")
        else:
            print(f"The system does not meet the required similarity threshold of {similarity_threshold}%. System evaluation failed.")
    else:
        print("No images processed. Evaluation cannot be performed.")


evaluate_image_labeling_system(folder_path, excel_path)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
(1) @GreyCupFestival - 109th Grey Cup.jpeg: Predicted - stage, Actual - Glasses, Similarity - 33%
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
(10) 17887803224903630.jpeg: Predicted - seashore, Actual - Woman, Similarity - 15%
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
(12) 17985809330117499.jpeg: Predicted - moped, Actual - Night, Similarity - 0%
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
(13) 18013990822817757.jpeg: Predicted - jersey, Actual - Paper, Similarity - 36%
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
(14) 17993584322154200.jpeg: Predicted - cliff, Actual - Lion, Similarity - 44%


In [15]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

Fine tune the pretrained model to get better results

In [16]:
# Load the pre-trained InceptionV3 model without the top layer
base_model = InceptionV3(weights='imagenet', include_top=False)

# Add new layers for fine-tuning
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)  # New FC layer, output dim=1024
predictions = Dense(1, activation='sigmoid')(x)  # New softmax layer for classification
model = Model(inputs=base_model.input, outputs=predictions)

# Freeze all layers in the base InceptionV3 model
for layer in base_model.layers:
    layer.trainable = False

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Prepare data for training
excel_path = 'Example Data-20240208T214429Z-001/Example Data/Imagelabels.xlsx'
df = pd.read_excel(excel_path)
df['Image Name'] = df['Image Name'].apply(lambda x: os.path.join('Example Data-20240208T214429Z-001/Example Data/exported', x))

# Assuming binary classification for simplicity, adjust as needed
df['Label'] = df['Label'].apply(lambda x: 1 if x == 'YourPositiveClass' else 0)

# Split the data into training and validation sets
train_df = df.sample(frac=0.8, random_state=200)
val_df = df.drop(train_df.index)

# Data augmentation for training images
train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest')

val_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

# Prepare the training and validation generators
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    x_col='Image Name',
    y_col='Label',
    target_size=(299, 299),
    batch_size=32,
    class_mode='binary')

val_generator = val_datagen.flow_from_dataframe(
    dataframe=val_df,
    x_col='Image Name',
    y_col='Label',
    target_size=(299, 299),
    batch_size=32,
    class_mode='binary')

# Train the model
model.fit(
    train_generator,
    steps_per_epoch=len(train_generator),
    epochs=10,  # Start with a few epochs; increase as needed
    validation_data=val_generator,
    validation_steps=len(val_generator))

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m87910968/87910968[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 0us/step


TypeError: If class_mode="binary", y_col="Label" column values must be strings.