In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd

##dataset

##video to image

In [None]:
! pip install python-magic

In [None]:
import magic
from PIL import Image, UnidentifiedImageError
import os
import cv2

def delete_file(file_path):
    os.remove(file_path)
    print(f"Deleted file {file_path}")

def extract_first_frame(video_path, output_path):
    cap = cv2.VideoCapture(video_path)
    ret, frame = cap.read()
    if ret:
        cv2.imwrite(output_path, frame)
        print(f"Extracted first frame from {video_path} to {output_path}")
    else:
        print(f"Failed to extract frame from {video_path}")
    cap.release()

def process_folder(folder_path):
    # Iterate through each file in the folder
    for filename in os.listdir(folder_path):
        file_path = os.path.join(folder_path, filename)

        # Check if the path is a file
        if os.path.isfile(file_path):
            mime = magic.Magic(mime=True)
            file_type = mime.from_file(file_path)

            if file_type is None:
                print(f"File {file_path} is not a recognized file.")
                os.remove(file_path)
            else:
                if file_type == 'text/plain':
                    delete_file(file_path)
                elif file_type.startswith('video'):
                    output_image_path = os.path.splitext(file_path)[0] + "_f.jpg"
                    extract_first_frame(file_path, output_image_path)
                    os.remove(file_path)
                # elif file_type.startswith('image'):
                else:
                    try:
                        image = Image.open(file_path)
                        image.verify()  # Verify if it's a valid image
                        print(f"Verified image file {file_path}")
                    except UnidentifiedImageError:
                        print(f"Cannot identify image file {file_path}")
                        delete_file(file_path)
                    except Exception as e:
                        print(f"An error occurred while processing image {file_path}: {e}")
                        delete_file(file_path)

        else:
            print(f'{file_path} is not a file, skipping...')



def process_users_in_chunks(folder_path, chunk_size=10):
    # List all users in the folder
    users = os.listdir(folder_path)

    # Iterate over users in chunks of `chunk_size`
    for i in range(0, len(users), chunk_size):
        chunk = users[i:i + chunk_size]
        print(f"Processing chunk: {chunk}")

        for user in chunk:
            user_folder_path = os.path.join(folder_path, user)
            if os.path.isdir(user_folder_path):
                process_folder(user_folder_path)

# Define the path to the folder containing users
folder_path = '/content/drive/Shareddrives/PFA_Dataset/images'

# Call the function to process users in chunks
process_users_in_chunks(folder_path)


---


##Preporocess images

In [0]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.utils import Sequence
from PIL import UnidentifiedImageError

class DataGenerator(Sequence):
    def __init__(self, image_paths, labels, batch_size=32, img_size=(150, 150), n_classes=5, shuffle=True):
        self.image_paths = image_paths
        self.labels = labels
        self.batch_size = batch_size
        self.img_size = img_size
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        return int(np.floor(len(self.image_paths) / self.batch_size))

    def __getitem__(self, index):
        indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
        image_paths_temp = [self.image_paths[k] for k in indexes]
        labels_temp = [self.labels[k] for k in indexes]

        X, y = self.__data_generation(image_paths_temp, labels_temp)

        return X, y

    def on_epoch_end(self):
        self.indexes = np.arange(len(self.image_paths))
        if self.shuffle:
            np.random.shuffle(self.indexes)

    def __data_generation(self, image_paths_temp, labels_temp):
        X = np.empty((self.batch_size, *self.img_size, 3))
        y = np.empty((self.batch_size, self.n_classes), dtype=int)

        for i, (img_path, label) in enumerate(zip(image_paths_temp, labels_temp)):
            try:
                img = tf.keras.preprocessing.image.load_img(img_path, target_size=self.img_size)
                img = tf.keras.preprocessing.image.img_to_array(img)
                img = img / 255.0  # Normalize the image to [0, 1]
            except (UnidentifiedImageError, IOError) as e:
                print(f"Error loading image {img_path}: {e}")
                img = np.zeros((*self.img_size, 3))  # Placeholder image

            X[i,] = img
            y[i,] = label

        return X, y

In [0]:
import os
import numpy as np
import pandas as pd


path = '/content/drive/MyDrive/PFA-Dataset/datas/final_data.csv'
images_path = '/content/drive/MyDrive/PFA-Dataset/Images'

# Load the labels CSV file
labels_df = pd.read_csv(path)

# Create lists of image paths and corresponding labels
image_paths = []
labels = []
not_found_users = []

for _, row in labels_df.iterrows():
    user_id = row['username']
    user_labels = row[['O', 'C', 'E', 'A', 'N']].values
    user_folder = f'{images_path}/{user_id}'

    if os.path.exists(user_folder):
        for image_name in os.listdir(user_folder):
            image_paths.append(os.path.join(user_folder, image_name))
            labels.append(user_labels)
    else:
        not_found_users.append(user_id)

labels = np.array(labels)

In [0]:
print(len(not_found_users))
print(image_paths[:10])
print(labels[:10])