In [1]:
# Step 1: Load the CSV file
import pandas as pd

# Load the CSV file
csv_file_path = "D:/AI Plan/Projects/Dataset/images.csv"
dataset_info = pd.read_csv(csv_file_path)

# Display basic information about the CSV file
print("Dataset Info:")
print(dataset_info.info())
print("\nSample of the Dataset:")
print(dataset_info.head())


Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5403 entries, 0 to 5402
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   image      5403 non-null   object
 1   sender_id  5403 non-null   int64 
 2   label      5403 non-null   object
 3   kids       5403 non-null   bool  
dtypes: bool(1), int64(1), object(2)
memory usage: 132.0+ KB
None

Sample of the Dataset:
                                  image  sender_id     label   kids
0  4285fab0-751a-4b74-8e9b-43af05deee22        124  Not sure  False
1  ea7b6656-3f84-4eb3-9099-23e623fc1018        148   T-Shirt  False
2  00627a3f-0477-401c-95eb-92642cbe078d         94  Not sure  False
3  ea2ffd4d-9b25-4ca8-9dc2-bd27f1cc59fa         43   T-Shirt  False
4  3b86d877-2b9e-4c8b-a6a2-1d87513309d0        189     Shoes  False


In [2]:
# Step 2: Load and Preprocess the Images
import os
import cv2

# Define the paths to the image folders
train_folder_path = "D:/AI Plan/Projects/Dataset/train"
valid_folder_path = "D:/AI Plan/Projects/Dataset/valid"
test_folder_path = "D:/AI Plan/Projects/Dataset/test"

# Function to load images from a folder
def load_images_from_folder(folder_path):
    images = []
    for filename in os.listdir(folder_path):
        img = cv2.imread(os.path.join(folder_path, filename))
        if img is not None:
            images.append(img)
    return images

# Load images from the training, validation, and testing folders
train_images = load_images_from_folder(train_folder_path)
valid_images = load_images_from_folder(valid_folder_path)
test_images = load_images_from_folder(test_folder_path)

# Display the number of images loaded from each folder
print("Number of Training Images:", len(train_images))
print("Number of Validation Images:", len(valid_images))
print("Number of Testing Images:", len(test_images))


Number of Training Images: 573
Number of Validation Images: 23
Number of Testing Images: 25


In [3]:
# Step 3: Preprocess the Dataset
import numpy as np

# Define function to resize images
def resize_images(images, size):
    resized_images = []
    for img in images:
        resized_img = cv2.resize(img, size)
        resized_images.append(resized_img)
    return resized_images

# Define image size for resizing
image_size = (224, 224)  # You can adjust the size as needed

# Resize the images
train_images_resized = resize_images(train_images, image_size)
valid_images_resized = resize_images(valid_images, image_size)
test_images_resized = resize_images(test_images, image_size)

# Convert the resized images to numpy arrays
X_train = np.array(train_images_resized)
X_valid = np.array(valid_images_resized)
X_test = np.array(test_images_resized)

# Display the shape of the resized image arrays
print("Shape of Training Images Array:", X_train.shape)
print("Shape of Validation Images Array:", X_valid.shape)
print("Shape of Testing Images Array:", X_test.shape)


Shape of Training Images Array: (573, 224, 224, 3)
Shape of Validation Images Array: (23, 224, 224, 3)
Shape of Testing Images Array: (25, 224, 224, 3)


In [5]:
# Filter labels to match the images used for training, validation, and testing
filtered_labels = labels[:len(train_images)]  # Filter labels for training images

# Encode filtered labels into numerical format
y_train = label_encoder.fit_transform(filtered_labels)

# Split the dataset into train, validation, and test sets
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.1, random_state=42)
X_valid, X_test, y_valid, y_test = train_test_split(X_valid, y_valid, test_size=0.5, random_state=42)

# Display the shapes of the split datasets
print("Shape of X_train:", X_train.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of X_valid:", X_valid.shape)
print("Shape of y_valid:", y_valid.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of y_test:", y_test.shape)


Shape of X_train: (515, 224, 224, 3)
Shape of y_train: (515,)
Shape of X_valid: (29, 224, 224, 3)
Shape of y_valid: (29,)
Shape of X_test: (29, 224, 224, 3)
Shape of y_test: (29,)


In [6]:
# Step 5: Build and Train a CNN Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# Define the CNN model architecture
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(10, activation='softmax')  # Assuming 10 clothing categories
])

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Display the model summary
model.summary()


  super().__init__(


In [7]:
# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


In [13]:
from keras.utils import to_categorical

# Encode target labels
num_classes = len(label_encoder.classes_)
y_train_encoded = to_categorical(y_train, num_classes)
y_valid_encoded = to_categorical(y_valid, num_classes)


In [15]:
from keras.models import Model
from keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense

# Define the input layer
input_layer = Input(shape=(224, 224, 3))

# Add convolutional layers
conv1 = Conv2D(32, kernel_size=(3, 3), activation='relu')(input_layer)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
conv2 = Conv2D(64, kernel_size=(3, 3), activation='relu')(pool1)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)

# Flatten the output from convolutional layers
flatten = Flatten()(pool2)

# Add a dense layer
dense = Dense(128, activation='relu')(flatten)

# Define the output layer with appropriate number of units (num_classes)
output_layer = Dense(num_classes, activation='softmax')(dense)

# Create the model
model = Model(inputs=input_layer, outputs=output_layer)

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


In [16]:
output_layer = Dense(num_classes, activation='softmax')(dense)


In [18]:
# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train_encoded, epochs=10, batch_size=32, validation_data=(X_valid, y_valid_encoded))


Epoch 1/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 385ms/step - accuracy: 0.0625 - loss: 1186.1874 - val_accuracy: 0.0345 - val_loss: 6.5231
Epoch 2/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 384ms/step - accuracy: 0.1369 - loss: 4.3396 - val_accuracy: 0.1034 - val_loss: 3.0229
Epoch 3/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 399ms/step - accuracy: 0.2331 - loss: 2.7828 - val_accuracy: 0.2414 - val_loss: 2.9114
Epoch 4/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 438ms/step - accuracy: 0.2844 - loss: 2.5889 - val_accuracy: 0.1724 - val_loss: 2.8919
Epoch 5/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 391ms/step - accuracy: 0.4378 - loss: 2.1770 - val_accuracy: 0.1724 - val_loss: 3.3578
Epoch 6/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 386ms/step - accuracy: 0.6356 - loss: 1.3887 - val_accuracy: 0.1379 - val_loss: 4.1391
Epoch 7/10
[1m17/17[0m 

In [20]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define the model architecture
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Data augmentation
train_datagen = ImageDataGenerator(rotation_range=20,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   horizontal_flip=True,
                                   fill_mode='nearest')

# Fit the model
history = model.fit(train_datagen.flow(X_train, y_train_encoded, batch_size=32),
                    epochs=10,
                    validation_data=(X_valid, y_valid_encoded))


Epoch 1/10


  self._warn_if_super_not_called()


[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 447ms/step - accuracy: 0.0725 - loss: 166.5773 - val_accuracy: 0.0690 - val_loss: 2.9756
Epoch 2/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 427ms/step - accuracy: 0.0340 - loss: 2.9981 - val_accuracy: 0.1034 - val_loss: 2.9460
Epoch 3/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 447ms/step - accuracy: 0.0934 - loss: 2.9307 - val_accuracy: 0.2414 - val_loss: 2.9531
Epoch 4/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 440ms/step - accuracy: 0.1207 - loss: 2.9066 - val_accuracy: 0.2069 - val_loss: 2.9005
Epoch 5/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 435ms/step - accuracy: 0.1470 - loss: 2.9201 - val_accuracy: 0.2069 - val_loss: 2.8691
Epoch 6/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 430ms/step - accuracy: 0.1082 - loss: 2.9063 - val_accuracy: 0.2069 - val_loss: 2.7123
Epoch 7/10
[1m17/17[0m [32m━━━━━━

In [40]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical

# Assuming you have your features stored in X_all and target labels stored in y_all
# Assuming you have X_valid and y_valid defined for validation data

# Fit label encoder on entire dataset
label_encoder = LabelEncoder()
label_encoder.fit(y_all)

# Encode target labels
num_classes = len(label_encoder.classes_)
y_all_encoded = to_categorical(label_encoder.transform(y_all), num_classes)

# Split the data into training and test sets
X_train, X_test, y_train_encoded, y_test_encoded = train_test_split(X_all, y_all_encoded, test_size=0.2, random_state=42)

# Now you can proceed with the rest of your code


NameError: name 'y_all' is not defined

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping

# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Define the model architecture
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

# Compile the model with adjusted learning rate
model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Train the model with data augmentation and early stopping
history = model.fit(datagen.flow(X_train, y_train_encoded, batch_size=32),
                    steps_per_epoch=len(X_train) // 32,
                    epochs=20,
                    validation_data=(X_valid, y_valid_encoded),
                    callbacks=[early_stopping])


In [None]:
# Encode test labels
y_test_encoded = label_encoder.transform(y_test)
y_test_encoded = to_categorical(y_test_encoded, num_classes)

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test_encoded)
