In [7]:
import cv2
import os
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings('ignore')

In [2]:
def preprocess_frame(frame, target_size=(100, 100)):
    """
    Preprocess a single frame:
    - Resize to target size
    - Convert to grayscale
    """
    # Resize frame
    frame_resized = cv2.resize(frame, target_size)
    # Convert to grayscale
    frame_gray = cv2.cvtColor(frame_resized, cv2.COLOR_BGR2GRAY)
    return frame_gray

In [14]:
# Define a mapping from label names to numerical values
label_map = {'HAS': 0, 'LAS': 1, 'Plug': 2}


def load_frames_from_folder(folder_path):
    """
    Load frames from a folder and preprocess them.
    """
    frames = []
    labels = []
    
    # Get list of image files in the folder
    image_files = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]
    
    for image_file in image_files:
        try:
            # Read image
            image_path = os.path.join(folder_path, image_file)
            frame = cv2.imread(image_path)
            if frame is None:
                continue
            
            # Preprocess frame
            processed_frame = preprocess_frame(frame)
            
            # Extract label from folder name
            label_name = os.path.basename(folder_path)
            label = label_map.get(label_name)
            if label is None:
                continue
            # Append processed frame and label to lists
            frames.append(processed_frame)
            labels.append(label)
        except Exception as e:
            print(f"Error loading image '{image_file}': {e}")
    
    return frames, labels

In [4]:
# Main function to load frames from three folders
def load_frames_main(data_folders):
    all_frames = []
    all_labels = []
    
    for folder in data_folders:
        frames, labels = load_frames_from_folder(folder)
        all_frames.extend(frames)
        all_labels.extend(labels)
    
    # Convert lists to numpy arrays
    all_frames = np.array(all_frames)
    all_labels = np.array(all_labels)
    
    return all_frames, all_labels

In [15]:
# List of folders containing frames, each representing a different class

data_folder_HAS = '../../Two Phase Data/HAS'
data_folder_LAS = '../../Two Phase Data/LAS'
data_folder_Plug = '../../Two Phase Data/Plug'

data_folders = [data_folder_HAS, data_folder_LAS, data_folder_Plug]

# Load frames and labels from folders
frames, labels = load_frames_main(data_folders)

# Check the shape of the loaded frames array
print("Shape of frames array:", frames.shape)

# Check the unique labels
print("Unique labels:", np.unique(labels))

Shape of frames array: (12589, 100, 100)
Unique labels: [0 1 2]


In [18]:
# Split the data into training and test sets (80% training, 20% test)
X_train, X_test, y_train, y_test = train_test_split(frames, labels, test_size=0.2, random_state=42)

# Further split the training set into training and validation sets (80% training, 20% validation)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Convert numpy arrays to TensorFlow Datasets
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val))
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))

# Optionally shuffle and batch the datasets
batch_size = 32
train_dataset = train_dataset.shuffle(buffer_size=len(X_train)).batch(batch_size)
val_dataset = val_dataset.batch(batch_size)
test_dataset = test_dataset.batch(batch_size)

# Print the number of samples in each dataset
print("Number of samples in training set:", len(X_train))
print("Number of samples in validation set:", len(X_val))
print("Number of samples in test set:", len(X_test))

Number of samples in training set: 8056
Number of samples in validation set: 2015
Number of samples in test set: 2518


In [19]:
# Define file paths to save the preprocessed data
train_data_path = '../data/train_data.npz'
val_data_path = '../data/val_data.npz'
test_data_path = '../data/test_data.npz'

# Save the preprocessed data as NumPy arrays
np.savez(train_data_path, frames=X_train, labels=y_train)
np.savez(val_data_path, frames=X_val, labels=y_val)
np.savez(test_data_path, frames=X_test, labels=y_test)

print("Preprocessed data saved successfully!")

Preprocessed data saved successfully!
