In [8]:
import os
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical

# Initialize lists to store image data and labels
X_train = []
Y_train = []

# Define the path to your dataset directory
dataset_dir = r'C:\Users\sadia_tisha1\Desktop\2014'


In [9]:

# Iterate through each subfolder in the dataset directory
for label, class_folder in enumerate(os.listdir(dataset_dir)):
    class_folder_path = os.path.join(dataset_dir, class_folder)

    # Check if it's a directory
    if os.path.isdir(class_folder_path):
        for image_file in os.listdir(class_folder_path):
            image_path = os.path.join(class_folder_path, image_file)
            
            # Check if the file is a valid image (e.g., skip Thumbs.db)
            if image_file.endswith(('.jpg', '.jpeg', '.png', '.gif', '.bmp')):
                # Open the image using PIL and convert to RGB
                image = Image.open(image_path).convert("RGB")
                
                # Resize the image to (32, 32, 3)
                image = image.resize((32, 32))
                
                # Convert PIL image to numpy array
                image_array = np.array(image)
                
                # Append the image data to X_train
                X_train.append(image_array)
                
                # Append the label to Y_train
                Y_train.append(label)

# Convert X_train and Y_train to numpy arrays
X_train = np.array(X_train)
Y_train = np.array(Y_train)

print("X_train shape:", X_train.shape)



X_train shape: (329832, 32, 32, 3)


In [11]:
X_train, X_test, Y_train, Y_test = train_test_split(X_train, Y_train, test_size=0.2, random_state=42)

num_classes = len(np.unique(Y_train))
print("Number of unique classes:", num_classes)



Number of unique classes: 91


In [13]:
unique_classes_train = np.unique(Y_train)
unique_classes_test = np.unique(Y_test)

print("Unique classes in Y_train:", unique_classes_train)
print("Unique classes in Y_test:", unique_classes_test)


Unique classes in Y_train: [  1   2   3   5   6   9  10  11  12  13  14  15  16  17  18  19  20  21
  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37  38  39
  40  41  42  43  45  46  47  48  50  51  53  55  56  57  59  60  61  62
  63  64  65  66  67  68  69  70  71  72  73  74  75  76  77  78  79  80
  81  82  83  84  85  86  87  88  89  90  91  92  93  95  96  97  99 101
 102]
Unique classes in Y_test: [  1   2   3   5   6   9  10  11  12  13  16  17  18  19  20  22  23  24
  25  26  27  28  29  30  31  32  33  34  35  36  37  38  39  40  41  43
  45  46  47  48  49  50  51  53  55  57  58  60  61  62  63  64  65  67
  68  69  70  71  72  73  74  75  76  77  78  79  80  81  82  83  84  85
  91  92  93  95  96  97  99 102]


In [16]:
# Adjust class labels to start from 0 and ensure they are within the range 0 to 90
Y_train_adjusted = np.clip(Y_train - 1, 0, 90)
Y_test_adjusted = np.clip(Y_test - 1, 0, 90)

# Convert Y_train and Y_test to categorical one-hot encoding
num_classes = len(np.unique(Y_train))
Y_train_categorical = to_categorical(Y_train_adjusted, num_classes=num_classes)
Y_test_categorical = to_categorical(Y_test_adjusted, num_classes=num_classes)


In [17]:

# Print the shapes of the arrays
print("X_train shape:", X_train.shape)
print("Y_train shape:", Y_train_categorical.shape)
print("X_test shape:", X_test.shape)
print("Y_test shape:", Y_test_categorical.shape)

X_train shape: (211092, 32, 32, 3)
Y_train shape: (211092, 91)
X_test shape: (52773, 32, 32, 3)
Y_test shape: (52773, 91)


In [18]:
# Create the ResNet model without the top (fully connected) layers
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(32, 32, 3))
    
# Add the top layers for classification
model = Sequential()
model.add(base_model)
model.add(GlobalAveragePooling2D())
model.add(Dense(num_classes, activation='softmax'))

model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

# Split your data into training and validation sets
X_train, X_valid, Y_train_categorical, Y_valid_categorical = train_test_split(X_train, Y_train_categorical, test_size=0.1, random_state=42)

# Create tf.data.Dataset for training and validation data
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, Y_train_categorical))
valid_dataset = tf.data.Dataset.from_tensor_slices((X_valid, Y_valid_categorical))

# Define batch size and shuffle the datasets
batch_size = 64
train_dataset = train_dataset.shuffle(buffer_size=len(X_train)).batch(batch_size)
valid_dataset = valid_dataset.batch(batch_size)

In [21]:
# Training the model
epochs = 50
model.fit(train_dataset, epochs=epochs, validation_data=valid_dataset)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x22a2f4ea700>

In [22]:

from sklearn.metrics import accuracy_score, f1_score

# Evaluate the model on the test dataset
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, Y_test_categorical))
test_dataset = test_dataset.batch(batch_size)

# Use the model to make predictions on the test dataset
y_pred = model.predict(test_dataset)

# Convert one-hot encoded predictions back to class labels
y_pred_labels = np.argmax(y_pred, axis=1)

# Calculate accuracy
accuracy = accuracy_score(Y_test, y_pred_labels)

# Calculate F1 score
f1 = f1_score(Y_test, y_pred_labels, average='weighted')  # You can specify the averaging method

# Print accuracy and F1 score
print("Accuracy:", accuracy)
print("F1 Score:", f1)

Accuracy: 0.000625319765789324
F1 Score: 0.0010614965105134187
