In [1]:
import os
import cv2
import numpy as np
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.models import Model

In [2]:
# Initialize VGG16 model for feature extraction (without the top layer)
base_model = VGG16(weights='imagenet', include_top=False)
model = Model(inputs=base_model.input, outputs=base_model.output)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 0us/step


In [3]:
def preprocess_image(image_path, target_size=(224, 224)):
    # Load and resize image
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert from BGR to RGB
    img = cv2.resize(img, target_size)
    img = np.expand_dims(img, axis=0)
    img = preprocess_input(img)
    return img

In [4]:
def extract_features(image_path):
    # Extract features from image using VGG16
    processed_image = preprocess_image(image_path)
    features = model.predict(processed_image)
    return features.flatten()

In [5]:
# feature extraction

train_folder = '../data/raw_data/train'

features_list = []
labels_list = []

# Iterate over all subfolders (mood categories) in the train folder
for mood in os.listdir(train_folder):
    mood_folder = os.path.join(train_folder, mood)
    
    # Check if it's a directory
    if os.path.isdir(mood_folder):
        # Iterate over all images in the mood subfolder
        for image_name in os.listdir(mood_folder):
            image_path = os.path.join(mood_folder, image_name)
            
            # Extract features from the image
            features = extract_features(image_path)
            
            # Append the features and corresponding mood label
            features_list.append(features)
            labels_list.append(mood)

FileNotFoundError: [WinError 3] The system cannot find the path specified: '../data/raw_data/train'

In [None]:
# Convert features and labels to NumPy arrays
features_array = np.array(features_list)
labels_array = np.array(labels_list)

# Save the features and labels to a .npz file
np.savez('../data/features_and_labels.npz', features=features_array, labels=labels_array)

print("Features and labels saved successfully!")