## Data Preprocessing

In [10]:
import cv2
import os

In [8]:
# Extract frames from video file
def extract_frames(video_path, frame_directory):
    os.makedirs(frame_directory, exist_ok=True)

    # Read video
    cap = cv2.VideoCapture(video_path)
    print(f"Processing {video_path}... Total Frames: {int(cap.get(cv2.CAP_PROP_FRAME_COUNT))}")

    # Check if video opened successfully
    if not cap.isOpened():
        print(f"Error: Could not open video {video_path}.")
        return

    frame_count = 0
    while True:
        # Read a frame
        success, frame = cap.read()
        
        # If read was successful, save the frame
        if success:
            frame_filename = os.path.join(frame_directory, f"frame_{frame_count:04d}.jpg")
            cv2.imwrite(frame_filename, frame)
            frame_count += 1
        else:
            # No more frames to be read
            break

    # Release the video capture object
    cap.release()
    print(f"Frames extracted for {video_path}: {frame_count}")

# Adjust this path to where your videos are stored
## video_folder_path = '/Users/lyricli/Documents/Visualization for Machine Learning/final project/Normal'
## video_folder_path = '/Users/lyricli/Documents/Visualization for Machine Learning/final project/Crash'
# video_folder_path = 'data/raw/Normal'
video_folder_path = 'data/raw/Crash'


# Adjust this path to where you want to save the frames
# frames_save_path = 'data/frames/Normal'
frames_save_path = 'data/frames/Crash'

# Get a list of all video files
video_files = [f for f in os.listdir(video_folder_path) if f.endswith('.mp4')]

# Process each video file to save them as frames
for video_file in video_files:
    video_path = os.path.join(video_folder_path, video_file)
    video_name = video_file.split('.')[0]  # Assuming the file name is the video name
    frame_directory = os.path.join(frames_save_path, video_name)
    extract_frames(video_path, frame_directory)

## Feature Extraction (CNN Feature Map Creation for Each Frame)

### Using TensorFlow

In [4]:
import cv2
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.preprocessing import image

In [2]:
# Loads and returns frames as np.array in RGB or Grayscale format
# :param frame_directory: String path to find all frames of one video
# :return np.array(frames): A numpy.array of tensor frame-arrays of pixel values

def load_frames(frame_directory):
    
    frames = []
    
    for frame in sorted(os.listdir(frame_directory)):
        img = cv2.imread(os.path.join(frame_directory, frame))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # load RGB img
        # img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # load Grayscale img
        frames.append(img)
        
    return np.array(frames)

In [3]:
# Create a dataset of video frames for the CNN model
# :param video_folder_path: path for storing "Normal" and "Crash" video frames
# :param video_number: number of videos one desires to create a dataset
# :return np.array(dataset): datasets of videos
# :return np.array(labels): labels corresponding to each of the video in videos

def create_dataset(video_folder_path, video_number):
    
    categories = ["Normal", "Crash"]
    
    dataset = [] # stores loaded video frames, i.e., [[frame1, frame2, ...], [frame1, frame2, ...], ...]
    labels = []  # 0 - Normal, 1 - Crash
    
    for category in categories:
        
        path = os.path.join(video_folder_path, category) # e.g. "data/frames/Normal"
        video_directories = os.listdir(path)[:video_number] # e.g. [000023, ..., 000022] not in order
        
        for video_dir in video_directories:
            full_video_path = os.path.join(path, video_dir) # e.g. "data/frames/Normal/000023"
            frames = load_frames(full_video_path) # see func `load_frames` above
            dataset.append(frames)
            labels.append(1 if category == "Crash" else 0)
    
    # dataset shape: (video_number * 2, frame number 50, frame height 720, frame width 1280, color channel 3)
    # labels shape:  (video_number * 2, )
    return np.array(dataset), np.array(labels)

#### Load dataset

**video_number** = How many videos per ["Normal", "Crash"] folder do you want to extract features from

if video_number = 3, then you will extract features of 3 normal and 3 crash videos

In [None]:
## create_dataset() inputs: video_folder_path, video_number

# Path name for storing the "Normal" and "Crash" video frames
video_folder_path = 'data/frames'

# Number of processed video-frames for each category of "Normal" and "Crash"
video_number = 3


## create_dataset() outputs: Output data and labels

data, labels = create_dataset(video_folder_path, video_number)

####  Load a pre-trained model

**Pre-trained Model?** We used pre-trained model for the baseline performance and for efficiency

**Is it good?** Not sure, since ResNet50 is trained on general images, but let's try, it saves time

**Why ResNet50?**: ResNet50 was pre-trained on general image classification task and it is fast to retrieve

In [5]:
# Load a pre-trained ResNet50 model

model = ResNet50(include_top=False, weights='imagenet')

# from tensorflow.keras.models import Model
# model = Model(inputs=resnet_model.input, outputs=resnet_model.output)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m243s[0m 3us/step


In [7]:
# Feature extraction
# :param: data
# :return: np.array(features)

def extract_features(data):
    features = []  # List to hold all feature vectors
    
    for video_frames in data:
        video_features = []  # List to hold feature vectors for one video
        
        # Process each frame in the video
        for frame in video_frames:
            img_tensor = image.img_to_array(frame)  # Convert frame to a numpy array
            img_tensor = np.expand_dims(img_tensor, axis=0)  # Add batch dimension
            img_tensor = tf.keras.applications.resnet.preprocess_input(img_tensor)  # Preprocess the frame
            
            feature = model.predict(img_tensor)  # Extract features using ResNet50
            video_features.append(feature)
            
        features.append(np.array(video_features))
        
    return np.array(features)

features = extract_features(data)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 887ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 379ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 372ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 375ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 381ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 376ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 381ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 371ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 377ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 374ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 380ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 370ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 376ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 403ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 419ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 386ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 415ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 402ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 404ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 381ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 393ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 382ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 382ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 380ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 381ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 382ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 388ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 382ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 382ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 385ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 384ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 379ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 382ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 383ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 389ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 382ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 386ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 383ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 381ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

In [8]:
print(features.shape) # (video_number * 2, frame number 50, layer depth 1, height 23, width 40, depth 2048)

(6, 50, 1, 23, 40, 2048)


### Train-Test Split

In [11]:
from sklearn.model_selection import train_test_split

# Assuming 'features' is your array of feature maps and 'labels' is your array of labels

features_train, features_test, labels_train, labels_test = train_test_split(
    features, labels, test_size=0.20, random_state=123
)

### Using Pytorch

In [None]:
import torch
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import numpy as np

In [17]:
# Custom dataset 还没写完！现在是用两个video做个试验
class DrivingFramesDataset(Dataset):
    def __init__(self, frame_dirs, labels, transform=None):
        """
        Args:
            frame_dirs (list): List of directories with frames.
            labels (list): List of labels for the directories.
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        self.frame_dirs = frame_dirs
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.frame_dirs)

    def __getitem__(self, idx):
        frames = []
        for frame_filename in os.listdir(self.frame_dirs[idx]):
            img_path = os.path.join(self.frame_dirs[idx], frame_filename)
            image = Image.open(img_path)
            if self.transform:
                image = self.transform(image)
            frames.append(image)
        frames = torch.stack(frames)  # Convert list of frames to a tensor
        return frames, self.labels[idx]

# Define your transforms
transform = transforms.Compose([
    transforms.ToTensor(),  # This also scales pixel values to [0, 1]
])

# Create your dataset
dataset = DrivingFramesDataset(frame_dirs=['data/frames/Crash/000001', 'data/frames/Normal/000001'],
                              labels=[1, 0],  # 1 for crash, 0 for normal
                              transform=transform)

# Data loader
data_loader = DataLoader(dataset, batch_size=32, shuffle=True)

# Now you can iterate over the data_loader during training
for frames, labels in data_loader:
    print(frames,labels)
    # Your training loop here
    pass


tensor([[[[[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           ...,
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]],

          [[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           ...,
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]],

          [[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
           [0.0000, 0.0000