In [1]:
# Import Necessary Libraries

import os
import numpy as np
import pandas as pd
import logging
import warnings
import random
import pickle

import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB3
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.utils import Sequence
from tensorflow.keras.callbacks import (
    EarlyStopping,
    ReduceLROnPlateau,
    TensorBoard,
    ModelCheckpoint,
    Callback
)

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split


In [2]:
# Configuration and Setup

# Suppress warnings
warnings.filterwarnings('ignore')

# Configure logging
logging.basicConfig(
    level=logging.DEBUG,  # Set to DEBUG level for detailed logging
    format='%(asctime)s [%(levelname)s] %(message)s',
    handlers=[
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

# Set seeds for reproducibility
SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)
random.seed(SEED)

# Enable memory growth for GPUs
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print(f"Enabled memory growth for {len(gpus)} GPU(s).")
    except RuntimeError as e:
        print(f"Error setting memory growth: {e}")


Enabled memory growth for 1 GPU(s).


In [3]:
# Define File Paths and Parameters

category = 'mens_tshirts'
csv_file = '/kaggle/input/m1dataset/Men_Tshirts.csv'
image_dir = '/kaggle/input/m1dataset/train_images'
test_image_dir = '/kaggle/input/m1dataset/test_images'

IMG_SIZE = (300, 300)
BATCH_SIZE = 16


In [4]:
# Data Loading and Preprocessing

# Load data
try:
    df = pd.read_csv(csv_file)
    df['category'] = category
    print(f"Loaded {len(df)} samples for category '{category}'.")
except FileNotFoundError:
    print(f"CSV file not found at path: {csv_file}")
    exit(1)

# Define attributes (excluding 'id')
attributes = ['color', 'neck', 'pattern', 'print_or_pattern_type', 'sleeve_length']

# Preprocess data
print(f"\nProcessing category: {category}")

# Drop columns where all values are NaN
columns_all_nan = [col for col in attributes if df[col].isna().all()]
df = df.drop(columns=columns_all_nan)
print(f"Dropping columns with all NaN: {columns_all_nan}")

# Update attributes list in case some were dropped
attributes = [col for col in attributes if col in df.columns]
print(f"Remaining attributes: {attributes}")

df = df[attributes + ['id']]  # Keep 'id' for image path reference
print(f"DataFrame head:\n{df.head()}")

# Pad 'id' column values to match the filenames and add '.jpg' extension
df['id'] = df['id'].astype(str).str.zfill(6) + '.jpg'

# Create full image paths
df['image_path'] = df['id'].apply(lambda x: os.path.join(image_dir, x))

# Check if images exist
df['image_exists'] = df['image_path'].apply(os.path.exists)

# Filter out samples where images are missing
df = df[df['image_exists']]
df = df.reset_index(drop=True)
print(f"Filtered dataset to {len(df)} samples where images exist.")

# Encode labels
label_encoders = {}
for attr in attributes:
    le = LabelEncoder()
    # Handle missing values by filling with a placeholder
    df[attr] = df[attr].fillna('Unknown').astype(str)
    df[attr] = le.fit_transform(df[attr])
    label_encoders[attr] = le
    print(f"Encoded attribute '{attr}' with {len(le.classes_)} classes.")

# Check for classes with insufficient samples
for attr in attributes:
    class_counts = df[attr].value_counts()
    min_count = class_counts.min()
    if min_count < 2:
        logger.warning(f"Attribute '{attr}' has a class with only {min_count} sample(s). Consider merging or removing such classes.")

# Split into training and validation sets
train_df, val_df = train_test_split(df, test_size=0.2, random_state=SEED)
train_df = train_df.reset_index(drop=True)
val_df = val_df.reset_index(drop=True)
print(f"Training samples: {len(train_df)}, Validation samples: {len(val_df)}")


Loaded 7267 samples for category 'mens_tshirts'.

Processing category: mens_tshirts
Dropping columns with all NaN: []
Remaining attributes: ['color', 'neck', 'pattern', 'print_or_pattern_type', 'sleeve_length']
DataFrame head:
        color   neck  pattern print_or_pattern_type  sleeve_length  id
0     default  round  printed               default  short sleeves   0
1  multicolor   polo    solid                 solid  short sleeves   1
2     default   polo    solid                 solid  short sleeves   2
3  multicolor   polo    solid                 solid  short sleeves   3
4  multicolor   polo    solid                 solid  short sleeves   4
Filtered dataset to 7267 samples where images exist.
Encoded attribute 'color' with 5 classes.
Encoded attribute 'neck' with 3 classes.
Encoded attribute 'pattern' with 3 classes.
Encoded attribute 'print_or_pattern_type' with 4 classes.
Encoded attribute 'sleeve_length' with 3 classes.
Training samples: 5813, Validation samples: 1454


In [7]:
# Define Custom Data Generator

class CustomDataGenerator(Sequence):
    """
    Custom data generator using Keras' Sequence API.
    """
    def __init__(self, df, attributes, datagen, batch_size=BATCH_SIZE, shuffle=True):
        self.df = df
        self.attributes = attributes
        self.datagen = datagen
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.on_epoch_end()
        
    def __len__(self):
        """
        Denotes the number of batches per epoch.
        """
        return int(np.ceil(len(self.df) / self.batch_size))
    
    def __getitem__(self, index):
        """
        Generate one batch of data.
        """
        # Generate indices of the batch
        start = index * self.batch_size
        end = min((index + 1) * self.batch_size, len(self.df))
        batch_df = self.df.iloc[start:end]
        
        images = []
        targets = {attr: [] for attr in self.attributes}
        
        for _, row in batch_df.iterrows():
            img_path = row['image_path']
            img = tf.keras.preprocessing.image.load_img(img_path, target_size=IMG_SIZE)
            img = tf.keras.preprocessing.image.img_to_array(img)
            images.append(img)
            for attr in self.attributes:
                targets[attr].append(row[attr])
        
        if len(images) == 0:
            # print(f"No images found in batch {index}.")
            raise ValueError(f"No images found in batch {index}.")
        
        images = np.array(images)
        y_targets = tuple(np.array(targets[attr]) for attr in self.attributes)
        
        # Apply data augmentation and preprocessing to images only
        augmented_images = next(self.datagen.flow(
            images,
            batch_size=len(images),
            shuffle=False
        ))
        
        # Debug logging
        # print(f"Batch {index}: augmented_images.shape = {augmented_images.shape}")
        for idx, attr in enumerate(self.attributes):
            logger.debug(f"Batch {index}: targets[{attr}].shape = {y_targets[idx].shape}")
        
        return augmented_images, y_targets
    
    def on_epoch_end(self):
        """
        Updates indexes after each epoch.
        """
        if self.shuffle:
            self.df = self.df.sample(frac=1, random_state=SEED).reset_index(drop=True)
            # print("Data shuffled after epoch.")


In [8]:
# Define Data Augmentation

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=15,
    zoom_range=0.1,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)

val_datagen = ImageDataGenerator(rescale=1./255)

In [9]:
# Initialize Data Generators

train_generator = CustomDataGenerator(
    df=train_df,
    attributes=attributes,
    datagen=train_datagen,
    batch_size=BATCH_SIZE,
    shuffle=True
)

val_generator = CustomDataGenerator(
    df=val_df,
    attributes=attributes,
    datagen=val_datagen,
    batch_size=BATCH_SIZE,
    shuffle=False
)

steps_per_epoch = len(train_generator)
validation_steps = len(val_generator)
# print(f"Steps per epoch: {steps_per_epoch}, Validation steps: {validation_steps}")

# Test the validation generator
# print("Testing the validation generator.")
try:
    val_images, val_targets = val_generator[0]
    # print(f"Validation batch images shape: {val_images.shape}")
    for idx, attr in enumerate(attributes):
        print(f"Validation batch targets for '{attr}' shape: {val_targets[idx].shape}")
except Exception as e:
    print(f"Error fetching validation batch: {e}")
    raise e  # Re-raise the exception to stop execution


Validation batch targets for 'color' shape: (16,)
Validation batch targets for 'neck' shape: (16,)
Validation batch targets for 'pattern' shape: (16,)
Validation batch targets for 'print_or_pattern_type' shape: (16,)
Validation batch targets for 'sleeve_length' shape: (16,)


In [10]:
# Build the Model

input_shape = IMG_SIZE + (3,)
base_model = EfficientNetB3(include_top=False, weights='imagenet', input_shape=input_shape)
base_model.trainable = False  # Freeze the base model
# print("Loaded EfficientNetB3 base model with ImageNet weights.")

# Add custom layers on top of the base model
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)

# Output layers for each attribute
outputs = []
for attr in attributes:
    num_classes = train_df[attr].nunique()
    output = Dense(num_classes, activation='softmax', name=attr)(x)
    outputs.append(output)
    # print(f"Added output layer for attribute '{attr}' with {num_classes} classes.")

# Define the complete model
model = Model(inputs=base_model.input, outputs=outputs)
print("Model architecture created.")

# Print model summary
model.summary()


Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb3_notop.h5
[1m43941136/43941136[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step
Model architecture created.


In [11]:
# Compile the Model

losses = {attr: 'sparse_categorical_crossentropy' for attr in attributes}
metrics = {attr: ['accuracy'] for attr in attributes}

print(f"Losses: {losses}")
print(f"Metrics: {metrics}")

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
    loss=losses,
    metrics=metrics
)
print("Model compiled with Adam optimizer and sparse categorical crossentropy loss.")


Losses: {'color': 'sparse_categorical_crossentropy', 'neck': 'sparse_categorical_crossentropy', 'pattern': 'sparse_categorical_crossentropy', 'print_or_pattern_type': 'sparse_categorical_crossentropy', 'sleeve_length': 'sparse_categorical_crossentropy'}
Metrics: {'color': ['accuracy'], 'neck': ['accuracy'], 'pattern': ['accuracy'], 'print_or_pattern_type': ['accuracy'], 'sleeve_length': ['accuracy']}
Model compiled with Adam optimizer and sparse categorical crossentropy loss.


In [12]:
# Define Callbacks

class BatchLoggingCallback(Callback):
    """
    Custom callback to log metrics after every few batches.
    """
    def __init__(self, log_every=10):
        super(BatchLoggingCallback, self).__init__()
        self.log_every = log_every

    def on_train_batch_end(self, batch, logs=None):
        if (batch + 1) % self.log_every == 0:
            log_message = f"Batch {batch + 1}: " + ", ".join([f"{k}={v:.4f}" for k, v in logs.items()])
            # print(log_message)

early_stop = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True,
    verbose=1
)

reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=2,
    verbose=1
)

checkpoint = ModelCheckpoint(
    filepath=f'{category}_best_model.keras',
    monitor='val_loss',
    save_best_only=True,
    verbose=1
)

tensorboard_callback = TensorBoard(
    log_dir='./logs',
    histogram_freq=1
)

batch_logging = BatchLoggingCallback(log_every=10)

callbacks = [
    early_stop,
    reduce_lr,
    checkpoint,
    tensorboard_callback,
    batch_logging
]


In [13]:
# Test model evaluation on validation data

# print("Testing model evaluation on validation data.")
try:
    val_logs = model.evaluate(
        val_generator,
        steps=validation_steps,
        verbose=1
    )
    # print(f"Validation logs: {val_logs}")
except Exception as e:
    print(f"Error during model evaluation: {e}")
    raise e  # Re-raise the exception to stop execution


I0000 00:00:1731713003.303482      74 service.cc:145] XLA service 0x7cb22c002a40 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1731713003.303541      74 service.cc:153]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0


[1m 2/91[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m5s[0m 58ms/step - color_accuracy: 0.2188 - loss: 6.3499 - neck_accuracy: 0.4531 - pattern_accuracy: 0.3125 - print_or_pattern_type_accuracy: 0.0781 - sleeve_length_accuracy: 0.7812  

I0000 00:00:1731713011.162469      74 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 210ms/step - color_accuracy: 0.1623 - loss: 6.2523 - neck_accuracy: 0.4222 - pattern_accuracy: 0.4139 - print_or_pattern_type_accuracy: 0.1117 - sleeve_length_accuracy: 0.7893


In [15]:
# Training Cell with Updated EvaluationCallback

# Import the necessary module for Callback
from tensorflow.keras.callbacks import Callback

# Define the corrected EvaluationCallback
class EvaluationCallback(Callback):
    def on_epoch_end(self, epoch, logs=None):
        # print(f"Starting evaluation at epoch {epoch + 1}")
        try:
            # Evaluate the model on the validation data
            val_logs = self.model.evaluate(
                val_generator,
                steps=validation_steps,
                verbose=1
            )
            # print(f"Validation logs: {val_logs}")
            # Update logs with validation metrics
            if logs is not None and val_logs is not None:
                # The first element is the total loss
                val_loss = val_logs[0]
                logs['val_loss'] = val_loss
                # The following elements are the accuracies for each attribute
                for i, attr in enumerate(attributes):
                    val_attr_accuracy = val_logs[i + 1]
                    logs[f'val_{attr}_accuracy'] = val_attr_accuracy
        except Exception as e:
            print(f"Exception during evaluation at epoch {epoch + 1}: {e}")
            self.model.stop_training = True  # Stop training on error
            raise e  # Re-raise the exception to see the traceback

# Update the callbacks list to include the updated EvaluationCallback
callbacks = [
    early_stop,
    reduce_lr,
    checkpoint,
    tensorboard_callback,
    batch_logging,
    EvaluationCallback()  # Add the updated callback here
]

# Remove validation_data and validation_steps from model.fit()
epochs_initial = 20
# print(f"Starting initial training for {epochs_initial} epochs.")

history = model.fit(
    train_generator,
    steps_per_epoch=steps_per_epoch,
    epochs=epochs_initial,
    callbacks=callbacks,
    verbose=1  # Keras' built-in progress bar
)


Epoch 1/20
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 69ms/step - color_accuracy: 0.2567 - loss: 5.5573 - neck_accuracy: 0.4222 - pattern_accuracy: 0.4139 - print_or_pattern_type_accuracy: 0.4225 - sleeve_length_accuracy: 0.7893
[1m364/364[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m246s[0m 512ms/step - color_accuracy: 0.2306 - loss: 7.0706 - neck_accuracy: 0.3764 - pattern_accuracy: 0.3834 - print_or_pattern_type_accuracy: 0.3208 - sleeve_length_accuracy: 0.6131 - learning_rate: 0.0010 - val_loss: 5.5936 - val_color_accuracy: 0.2696 - val_neck_accuracy: 0.4140 - val_pattern_accuracy: 0.4202 - val_print_or_pattern_type_accuracy: 0.4182 - val_sleeve_length_accuracy: 0.7827
Epoch 2/20
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 59ms/step - color_accuracy: 0.2567 - loss: 5.5573 - neck_accuracy: 0.4222 - pattern_accuracy: 0.4139 - print_or_pattern_type_accuracy: 0.4225 - sleeve_length_accuracy: 0.7893
[1m364/364[0m [32m━━━━━━━━━━━━━━━━━━━━[

In [16]:
# Fine-Tune the Model

logger.info("Starting fine-tuning of the model.")
base_model.trainable = True  # Unfreeze the base model

# Freeze the first 100 layers
for layer in base_model.layers[:100]:
    layer.trainable = False
logger.info("Unfroze the top layers of the base model for fine-tuning.")

# Recompile the model with a lower learning rate
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=3e-5),
    loss=losses,
    metrics=metrics
)
logger.info("Recompiled the model with a lower learning rate for fine-tuning.")

# Fine-tuning training
epochs_fine = 10
print(f"Starting fine-tuning for {epochs_fine} epochs.")

history_fine = model.fit(
    train_generator,
    steps_per_epoch=steps_per_epoch,
    epochs=epochs_fine,
    callbacks=callbacks,
    verbose=1  # Keras' built-in progress bar
)


Starting fine-tuning for 10 epochs.
Epoch 1/10
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 109ms/step - color_accuracy: 0.2489 - loss: 4.6152 - neck_accuracy: 0.6761 - pattern_accuracy: 0.6744 - print_or_pattern_type_accuracy: 0.6281 - sleeve_length_accuracy: 0.7786
[1m364/364[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m335s[0m 561ms/step - color_accuracy: 0.2911 - loss: 5.9456 - neck_accuracy: 0.5827 - pattern_accuracy: 0.5343 - print_or_pattern_type_accuracy: 0.4841 - sleeve_length_accuracy: 0.6846 - learning_rate: 3.0000e-05 - val_loss: 4.6533 - val_color_accuracy: 0.2593 - val_neck_accuracy: 0.6664 - val_pattern_accuracy: 0.6651 - val_print_or_pattern_type_accuracy: 0.6162 - val_sleeve_length_accuracy: 0.7662
Epoch 2/10
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 66ms/step - color_accuracy: 0.2489 - loss: 4.6152 - neck_accuracy: 0.6761 - pattern_accuracy: 0.6744 - print_or_pattern_type_accuracy: 0.6281 - sleeve_length_accuracy: 0.7786


In [17]:
# Save the Final Model and Encoders

# Save the final trained model
final_model_path = f'{category}_model_final.h5'
model.save(final_model_path)
logger.info(f"Final model saved as {final_model_path}.")

# Save label encoders for future use
label_encoders_path = f'{category}_label_encoders.pkl'
with open(label_encoders_path, 'wb') as f:
    pickle.dump(label_encoders, f)
logger.info(f"Label encoders saved as {label_encoders_path}.")

logger.info("Training process completed successfully.")


In [18]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import pickle


In [19]:
# Parameters
IMG_SIZE = (300, 300)  # Should match the size used during training
category = 'mens_tshirts'  # Replace with your category name if different

# Paths to the saved model and label encoders
model_path = f'/kaggle/working/{category}_model_final.h5'
label_encoders_path = f'/kaggle/working/{category}_label_encoders.pkl'


In [20]:
# Load the trained model
model = load_model(model_path)
print("Model loaded successfully.")

# Load label encoders
with open(label_encoders_path, 'rb') as f:
    label_encoders = pickle.load(f)
print("Label encoders loaded successfully.")


Model loaded successfully.
Label encoders loaded successfully.


In [21]:
def preprocess_image(image_path):
    """
    Preprocesses the image for prediction.
    """
    # Load the image
    img = load_img(image_path, target_size=IMG_SIZE)
    # Convert to array
    img_array = img_to_array(img)
    # Rescale (same as during training)
    img_array = img_array / 255.0
    # Expand dimensions to match the input shape (1, IMG_SIZE[0], IMG_SIZE[1], 3)
    img_array = np.expand_dims(img_array, axis=0)
    return img_array


In [22]:
def predict_attributes(image_path, model, label_encoders):
    """
    Predicts the attributes of an image using the trained model and label encoders.
    """
    # Preprocess the image
    img_array = preprocess_image(image_path)
    
    # Make predictions
    predictions = model.predict(img_array)
    
    # If predictions is a list of arrays (one per attribute)
    # Decode the predictions using the label encoders
    decoded_predictions = {}
    for idx, attr in enumerate(label_encoders.keys()):
        # Get the predicted class index (the one with the highest probability)
        pred_class_index = np.argmax(predictions[idx], axis=1)[0]
        # Decode the class index back to the original label
        pred_label = label_encoders[attr].inverse_transform([pred_class_index])[0]
        decoded_predictions[attr] = pred_label
    
    return decoded_predictions


In [None]:
# Path to the new image you want to predict
new_image_path = '/path/to/your/new/image.jpg'  # Update with your image path

# Check if the image exists
if not os.path.exists(new_image_path):
    print(f"Image not found at path: {new_image_path}")
else:
    # Make predictions
    result = predict_attributes(new_image_path, model, label_encoders)
    
    # Display the predictions
    print("Predicted attributes:")
    for attr, value in result.items():
        print(f"{attr}: {value}")


In [25]:
import os
import pandas as pd
import tensorflow as tf  # Import TensorFlow to control logging if you are using TensorFlow or Keras

# Suppress TensorFlow logging
tf.get_logger().setLevel('ERROR')

# Path to your images folder
images_folder_path = '/kaggle/input/m1dataset/test_images'  # Update this to your images folder path
csv_file_path = '/kaggle/input/m1dataset/test.csv'  # Update this to your CSV file path

# Load the CSV file
data = pd.read_csv(csv_file_path)

# Initialize or load the output DataFrame if it already exists
output_file = 'output_predictions_efficient_net_mens_tshirt.csv'
if os.path.exists(output_file):
    output_df = pd.read_csv(output_file)
else:
    output_df = pd.DataFrame(columns=['id', 'Category', 'len', 'response'])

count = 0

# Loop through the images
for index, row in data.iterrows():
    # Check if the row is already processed
    if row['id'] in output_df['id'].values:
        continue  # Skip if this row was processed before

    if count % 100 == 0:
        print(count)
    
    image_id = str(row['id']).zfill(6)  # Assuming image names are zero-padded (e.g., 000000.jpg)
    image_path = os.path.join(images_folder_path, f"{image_id}.jpg")
    
    if not os.path.exists(image_path):
        print(f"Image not found at path: {image_path}")
        response = "Image not found"
    else:
        # Make predictions
        try:
            result = predict_attributes(image_path, model, label_encoders)  # Replace with your prediction logic
            response = result
        except Exception as e:
            print(f"Error processing image {image_path}: {e}")
            response = "Error in prediction"

    # Append the new row to the DataFrame
    new_row = pd.DataFrame([{
        'id': row['id'],
        'Category': row['Category'],
        'len': 5,
        'response': response
    }])
    
    # Append to the main DataFrame and save to CSV immediately
    output_df = pd.concat([output_df, new_row], ignore_index=True)
    output_df.to_csv(output_file, index=False)
    count += 1

print("Processing completed. Output saved to output_predictions.csv")


0
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 

KeyboardInterrupt: 

In [29]:
import os
import pandas as pd
import tensorflow as tf
import sys
from contextlib import redirect_stdout

# Suppress TensorFlow logging
tf.get_logger().setLevel('ERROR')
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # Suppress TensorFlow warnings

# Path to your images folder
images_folder_path = '/kaggle/input/m1dataset/test_images'  # Update this to your images folder path
csv_file_path = '/kaggle/input/m1dataset/test.csv'  # Update this to your CSV file path

# Load the CSV file
data = pd.read_csv(csv_file_path)

# Initialize or load the output DataFrame if it already exists
output_file = 'output_predictions_efficient_net_mens_tshirt.csv'
if os.path.exists(output_file):
    output_df = pd.read_csv(output_file)
else:
    output_df = pd.DataFrame(columns=['id', 'Category', 'len', 'response'])

count = 0

# Loop through the images
for index, row in data.iterrows():
    # Check if the row is already processed
    if row['id'] in output_df['id'].values:
        continue  # Skip if this row was processed before

    if count % 100 == 0:
        print(count)
    
    image_id = str(row['id']).zfill(6)  # Assuming image names are zero-padded (e.g., 000000.jpg)
    image_path = os.path.join(images_folder_path, f"{image_id}.jpg")
    print(image_path)
    
    if not os.path.exists(image_path):
        print(f"Image not found at path: {image_path}")
        response = "Image not found"
    else:
        # Make predictions with output suppression
        try:
            with open(os.devnull, "w") as f, redirect_stdout(f):  # Redirect stdout to silence output
                result = predict_attributes(image_path, model, label_encoders)  # Replace with your prediction logic
            response = result
        except Exception as e:
            print(f"Error processing image {image_path}: {e}")
            response = "Error in prediction"

    # Append the new row to the DataFrame
    new_row = pd.DataFrame([{
        'id': row['id'],
        'Category': row['Category'],
        'len': 5,
        'response': response
    }])
    
    # Append to the main DataFrame and save to CSV immediately
    output_df = pd.concat([output_df, new_row], ignore_index=True)
    output_df.to_csv(output_file, index=False)
    count += 1

print("Processing completed. Output saved to output_predictions.csv")


0
/kaggle/input/m1dataset/test_images/000863.jpg
/kaggle/input/m1dataset/test_images/000864.jpg
/kaggle/input/m1dataset/test_images/000865.jpg
/kaggle/input/m1dataset/test_images/000866.jpg
/kaggle/input/m1dataset/test_images/000867.jpg
/kaggle/input/m1dataset/test_images/000868.jpg
/kaggle/input/m1dataset/test_images/000869.jpg
/kaggle/input/m1dataset/test_images/000870.jpg
/kaggle/input/m1dataset/test_images/000871.jpg
/kaggle/input/m1dataset/test_images/000872.jpg
/kaggle/input/m1dataset/test_images/000873.jpg
/kaggle/input/m1dataset/test_images/000874.jpg
/kaggle/input/m1dataset/test_images/000875.jpg
/kaggle/input/m1dataset/test_images/000876.jpg
/kaggle/input/m1dataset/test_images/000877.jpg
/kaggle/input/m1dataset/test_images/000878.jpg
/kaggle/input/m1dataset/test_images/000879.jpg
/kaggle/input/m1dataset/test_images/000880.jpg
/kaggle/input/m1dataset/test_images/000881.jpg
/kaggle/input/m1dataset/test_images/000882.jpg
/kaggle/input/m1dataset/test_images/000883.jpg
/kaggle/inp

KeyboardInterrupt: 

In [27]:
rm -rf /kaggle/working/output_predictions_efficient_net_mens_tshirt.csv