In [2]:
#Importing the necessary libraries
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import VGG16
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import preprocess_input
import os
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D, InputLayer

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [3]:
# Function to load and preprocess images
def load_and_preprocess_image(img_path, target_size=(224, 224)):
    img = image.load_img(img_path, target_size=target_size)
    img_array = image.img_to_array(img)
    img_array_expanded_dims = np.expand_dims(img_array, axis=0)
    return preprocess_input(img_array_expanded_dims)

In [4]:
# Load the Dataset 
data_path = 'Example Data-20240208T214429Z-001/Example Data/ImageLabels.xlsx'
images_folder = 'Example Data-20240208T214429Z-001/Example Data/exported'
df = pd.read_excel(data_path)

In [5]:
# Preprocess the DataFrame
df['Aliases'].fillna('None', inplace=True)
df['Parents'].fillna('None', inplace=True)
df['preprocessed_image'] = df['Image Name'].apply(lambda x: load_and_preprocess_image(os.path.join(images_folder, x)))

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Aliases'].fillna('None', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Parents'].fillna('None', inplace=True)


In [6]:
# Encode labels
label_encoder = LabelEncoder()
df['label_encoded'] = label_encoder.fit_transform(df['Label'])
num_classes = df['label_encoded'].nunique()
y = to_categorical(df['label_encoded'], num_classes=num_classes)

In [7]:
# Prepare the dataset
X = np.array(df['preprocessed_image'].tolist())
y = df['label_encoded'].values
weights = df['Confidence'] / 100  # Normalize confidence scores to be between 0 and 1

In [8]:
# Split dataset
X_train, X_test, y_train, y_test, weights_train, weights_test = train_test_split(X, y, weights, test_size=0.2, random_state=42)

In [9]:
y_train = to_categorical(y_train, num_classes)


In [10]:
# Define the CNN model
model = Sequential([
    InputLayer(input_shape=(224, 224, 3)),  # Adjust input shape based on  image preprocessing
    Conv2D(32, kernel_size=(3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.25),
    Conv2D(64, kernel_size=(3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.25),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')  # 'num_classes' should match the number of categories
])



In [11]:
#compiling the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [12]:
# Ensure X_train is a numpy array and has the correct shape
X_train = np.squeeze(X_train)

# Check the shape of X_train
print("Shape of X_train:", X_train.shape)

Shape of X_train: (318, 224, 224, 3)


In [13]:

model.fit(X_train, y_train, sample_weight=weights_train, epochs=50, validation_split=0.1)


Epoch 1/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 414ms/step - accuracy: 0.0062 - loss: 735.5385 - val_accuracy: 0.0000e+00 - val_loss: 4.6890
Epoch 2/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 408ms/step - accuracy: 0.0512 - loss: 4.7372 - val_accuracy: 0.1250 - val_loss: 4.6202
Epoch 3/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 446ms/step - accuracy: 0.0733 - loss: 4.6867 - val_accuracy: 0.1250 - val_loss: 4.6179
Epoch 4/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 462ms/step - accuracy: 0.0607 - loss: 4.6901 - val_accuracy: 0.1250 - val_loss: 4.6324
Epoch 5/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 465ms/step - accuracy: 0.0705 - loss: 4.6631 - val_accuracy: 0.1250 - val_loss: 4.6296
Epoch 6/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 458ms/step - accuracy: 0.0493 - loss: 4.6244 - val_accuracy: 0.1250 - val_loss: 4.6271
Epoch 7/50
[1m9/9[0m [32m━━━━━━

<keras.src.callbacks.history.History at 0x14d531d7b60>