# The aim of this notebook: 
# `Videos -> DataFrames -> CSV -> Plot -> Processing -> LSTM Model`

In [1]:
!pip install mediapipe==0.8.9.1 --quiet
!pip install tensorflow --quiet
#!pip install imageio --quiet
#pip install 'imageio[ffmpeg]' --quiet

In [2]:
import os
import cv2
# from google.colab.patches import cv2_imshow ## NOT FOR JUPYTER NOTEBOOKS
import mediapipe as mp
import shutil
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time

import tensorflow
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Masking


2023-09-03 19:31:38.948268: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-09-03 19:31:38.993596: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-09-03 19:31:38.994846: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
DIRECTORY_PATH = "videos_fold_1"
CSV_DIRECTORY = "csv_data_output" # NOTE this folder will be nested inside DIRECTORY_PATH
PLOT_PNG_DIRECTORY = "png_plots"
NO_NANS_CSV_DIRECTORY = "csv_data_no_nans" # this is where the no nan value csv files will go.
NO_NANS_TO_PLOT = "csv_data_no_nans_plots"
LOG_PATH = "tensorflow_logs"
DIRECTORY_TO_CSV = f"{DIRECTORY_PATH}/{CSV_DIRECTORY}"
DIRECTORY_TO_PNG = f"{DIRECTORY_PATH}/{PLOT_PNG_DIRECTORY}"
DIRECTORY_TO_NO_NANS_CSV = f"{DIRECTORY_PATH}/{NO_NANS_CSV_DIRECTORY}"
DIRECTORY_TO_NO_NANS_TO_PLOT = f"{DIRECTORY_PATH}/{NO_NANS_TO_PLOT}"
DIRECTORY_TO_LOG = f"{DIRECTORY_PATH}/{NO_NANS_CSV_DIRECTORY}/{LOG_PATH}"

VIDEO_TIME_MINS = 10
TRIM_TIME = 10 # Seconds
FPS_GUESS = 30
TRIM_ROWS = TRIM_TIME * FPS_GUESS # This is the number of rows that will be trimmed from the beginning and end of the data.
MAX_ROWS = FPS_GUESS*VIDEO_TIME_MINS*60
print(f"All dataframes must be {MAX_ROWS} rows before trimming. This means padding files where needed.\nThen be trimmed by {TRIM_ROWS} from the beginning and end.")

All dataframes must be 18000 rows before trimming. This means padding files where needed.
Then be trimmed by 300 from the beginning and end.


# Generate Video Data Dictionary

In [4]:
def generate_video_list_of_dictionaries(directory):
    videos_dictionary = {}   
    for root, dirs, files in os.walk(directory):
        for file in files:
            # Check if the file has a video extension (you can add more extensions)
            if file.lower().endswith(('.mp4', '.MP4', '.avi', '.mkv', '.mov','.MOV', '.m4v')):
                person_id = os.path.basename(root)     # Folder Name
                class_id = os.path.splitext(file)[0]   # File name with no extension
                video_path_and_filename = os.path.join(root, file)
                person_id_class_key = f"{person_id}_{class_id}"
                filename_ext = f"{class_id}{os.path.splitext(file)[1]}"
                csv_filename = f"data_person_{person_id}_class_{class_id}.csv"
                
                cap = cv2.VideoCapture(video_path_and_filename)
                fps = round(cap.get(cv2.CAP_PROP_FPS), 2)
                total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
                time_in_seconds_derived = total_frames / fps
                
                combined_name = f"{person_id}_{class_id}"
                
                video_data = {
                    "person_id_class_key": person_id_class_key,
                    "video_path_and_filename": video_path_and_filename,
                    "filename_ext": filename_ext,
                    "person_id": person_id,
                    "class_id": class_id,
                    "csv_filename": csv_filename,
                    "fps": fps,
                    "total_frames": total_frames,
                    "time_in_seconds_derived": time_in_seconds_derived
                }
                
                videos_dictionary[combined_name] = video_data
                
    return videos_dictionary

VIDEO_DICT = generate_video_list_of_dictionaries(DIRECTORY_PATH)
VIDEO_DICT_df = pd.DataFrame(VIDEO_DICT)

print(VIDEO_DICT)
print("")
print(f">>> You have {len(VIDEO_DICT)} videos in your list!\n")

# VIDEO_DICT["4_0"]

{'27_0': {'person_id_class_key': '27_0', 'video_path_and_filename': 'videos_fold_1/27/0.mov', 'filename_ext': '0.mov', 'person_id': '27', 'class_id': '0', 'csv_filename': 'data_person_27_class_0.csv', 'fps': 30.02, 'total_frames': 19727, 'time_in_seconds_derived': 657.128580946036}, '27_10': {'person_id_class_key': '27_10', 'video_path_and_filename': 'videos_fold_1/27/10.mov', 'filename_ext': '10.mov', 'person_id': '27', 'class_id': '10', 'csv_filename': 'data_person_27_class_10.csv', 'fps': 30.02, 'total_frames': 18627, 'time_in_seconds_derived': 620.4863424383744}, '27_5': {'person_id_class_key': '27_5', 'video_path_and_filename': 'videos_fold_1/27/5.mov', 'filename_ext': '5.mov', 'person_id': '27', 'class_id': '5', 'csv_filename': 'data_person_27_class_5.csv', 'fps': 30.02, 'total_frames': 18774, 'time_in_seconds_derived': 625.3830779480346}, '20_0': {'person_id_class_key': '20_0', 'video_path_and_filename': 'videos_fold_1/20/0.mp4', 'filename_ext': '0.mp4', 'person_id': '20', 'clas

In [13]:
VIDEO_DICT_df.head()

Unnamed: 0,person_id_class_key,video_path_and_filename,filename_ext,person_id,class_id,csv_filename,fps,total_frames,time_in_seconds_derived
27_0,27_0,videos_fold_1/27/0.mov,0.mov,27,0,data_person_27_class_0.csv,30.02,19727,657.128581
27_10,27_10,videos_fold_1/27/10.mov,10.mov,27,10,data_person_27_class_10.csv,30.02,18627,620.486342
27_5,27_5,videos_fold_1/27/5.mov,5.mov,27,5,data_person_27_class_5.csv,30.02,18774,625.383078
20_0,20_0,videos_fold_1/20/0.mp4,0.mp4,20,0,data_person_20_class_0.csv,29.97,19585,653.48682
20_10,20_10,videos_fold_1/20/10.mp4,10.mp4,20,10,data_person_20_class_10.csv,29.95,21527,718.764608


In [12]:
VIDEO_DICT_df.to_csv('all_video_information.csv')

# Face Coordinates

In [5]:
### EYES ######################################################################
# Caution with end points, there is overlap! Read images left to right.
LEFT_EYELID = [33, 246, 161, 160, 159, 158, 157, 173, 133]       # 9 points
LEFT_EYE_BOTTOM = [33, 7, 163, 144, 145, 153, 154, 155, 133]     # 9 points
RIGHT_EYELID = [362, 398, 384, 385, 386, 387, 388, 466, 263]     # 9 points
RIGHT_EYE_BOTTOM = [362, 382, 381, 380, 374, 373, 390, 249, 263] # 9 points
EYES = LEFT_EYELID + LEFT_EYE_BOTTOM + RIGHT_EYELID + RIGHT_EYE_BOTTOM

### EYES BASIC ################################################################
LEFT_EYE_TOP_INDEX = 159
LEFT_EYE_BOTTOM_INDEX = 145
LEFT_EYE_LEFT_SIDE_INDEX = 33
LEFT_EYE_RIGHT_SIDE_INDEX = 133

RIGHT_EYE_TOP_INDEX = 386
RIGHT_EYE_BOTTOM_INDEX = 374
RIGHT_EYE_LEFT_SIDE_INDEX = 362
RIGHT_EYE_RIGHT_SIDE_INDEX = 263

LEFT_EYE_BASIC_INDEXES = [LEFT_EYE_TOP_INDEX, LEFT_EYE_BOTTOM_INDEX, LEFT_EYE_LEFT_SIDE_INDEX, LEFT_EYE_RIGHT_SIDE_INDEX]
RIGHT_EYE_BASIC_INDEXES = [RIGHT_EYE_TOP_INDEX, RIGHT_EYE_BOTTOM_INDEX, RIGHT_EYE_LEFT_SIDE_INDEX, RIGHT_EYE_RIGHT_SIDE_INDEX]
EYES_BASIC_INDEXES = LEFT_EYE_BASIC_INDEXES + RIGHT_EYE_BASIC_INDEXES

### MOUTH #####################################################################
# Caution with end points, there is overlap! Read images left to right.
MOUTH_UPPER_OUTLINE = [61, 185, 40, 39, 37, 0, 267, 269, 270, 409, 291]
MOUTH_LOWER_OUTLINE = [61, 146, 91, 181, 84, 17, 314, 405, 321, 375, 291]
MOUTH_FULL_OUTLINE = MOUTH_UPPER_OUTLINE + MOUTH_LOWER_OUTLINE

# The upper lip lower line that runs along the mouth opening
UPPER_LIP_LOWER = [61, 191, 80, 81, 82, 13, 312, 311, 310, 415, 291]
# The lower lip upper line that runs along the mouth opening
LOWER_LIP_UPPER = [61, 95, 88, 178, 87, 14, 317, 402, 318, 324, 291]

UPPER_LIP_OUTLINE = MOUTH_UPPER_OUTLINE + UPPER_LIP_LOWER
LOWER_LIP_OUTLINE = MOUTH_LOWER_OUTLINE + LOWER_LIP_UPPER
LIPS_FULL = UPPER_LIP_OUTLINE + LOWER_LIP_OUTLINE

MOUTH_TOP_INDEX = 0
MOUTH_BOTTOM_INDEX = 17
MOUTH_LEFT_SIDE_INDEX = 61
MOUTH_RIGHT_SIDE_INDEX = 291
LIP_UPPER_INDEX = 13
LIP_LOWER_INDEX = 14
MOUTH_BASIC_INDEXES = [MOUTH_TOP_INDEX, MOUTH_BOTTOM_INDEX, MOUTH_LEFT_SIDE_INDEX, MOUTH_RIGHT_SIDE_INDEX]
MOUTH_LIP_BASIC_INDEXES = [MOUTH_TOP_INDEX, MOUTH_BOTTOM_INDEX, MOUTH_LEFT_SIDE_INDEX, MOUTH_RIGHT_SIDE_INDEX, LIP_UPPER_INDEX, LIP_LOWER_INDEX]
# print(MOUTH_BASIC_INDEXES)

### FEATURE SETS ##############################################################
MOUTH_EYES_FULL = MOUTH_FULL_OUTLINE + EYES
MOUTH_EYES_BASIC = MOUTH_BASIC_INDEXES + EYES_BASIC_INDEXES
MOUTH_LIPS_EYES_FULL = MOUTH_FULL_OUTLINE + LIPS_FULL + EYES
MOUTH_LIPS_EYES_BASIC = MOUTH_LIP_BASIC_INDEXES + EYES_BASIC_INDEXES # inner and outer
# print(MOUTH_BASIC_AND_EYES_BASIC_INDEXES)

## Function to Calculate the distances between facial features

In [6]:
def euclidean_distance(point1, point2):
    """
    Calculates the absolute distance between 2 points, regardles of orientation.
    point1 and point2 are tuples of (x,y) coordinates
    """
    x1, y1 = point1
    x2, y2 = point2
    distance = ((x2 - x1)**2 + (y2 - y1)**2)**0.5
    return distance

## Function to track coordinates and return a DataFrame

In [7]:
def video_tracking_to_df(video_input_file):
    # Face Mesh
    mp_face_mesh = mp.solutions.face_mesh
    face_mesh = mp_face_mesh.FaceMesh()

    # Video Initialisation and parameters
    cap = cv2.VideoCapture(video_input_file)
    fps = round(cap.get(cv2.CAP_PROP_FPS), 2)

    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    time_in_seconds_derived = total_frames / fps

    left_eye_aperture_measurements = []
    right_eye_aperture_measurements = []
    mouth_top_bottom_aperture_measurements = []
    mouth_left_right_aperture_measurements = []    

    while cap.isOpened():
        # Input (video)

        ret, frame = cap.read() # ret is short for return, but we cannot use return as a variable name.
        if not ret: # This will stop the video when the frames run out!
            break

        height, width, channels = frame.shape
        # print("Images width x height", width, "x", height)

        # Correct the input channel order:
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        try:
            # Here the face coordinates will be calculated only if ALL values are present in the result.
            # Facial Landmarks
            result = face_mesh.process(rgb_frame)
            # print(result.multi_face_landmarks.eye_landmarks.left_eye_bottom_y)

            ### RECORDING EYES #######################################################
            LETx = result.multi_face_landmarks[0].landmark[LEFT_EYE_TOP_INDEX].x
            LETy = result.multi_face_landmarks[0].landmark[LEFT_EYE_TOP_INDEX].y
            LEBx = result.multi_face_landmarks[0].landmark[LEFT_EYE_BOTTOM_INDEX].x
            LEBy = result.multi_face_landmarks[0].landmark[LEFT_EYE_BOTTOM_INDEX].y

            LET = (LETx, LETy)
            LEB = (LEBx, LEBy)

            LEFT_EYE_APERTURE_MEASUREMENT = euclidean_distance(LET, LEB)
            left_eye_aperture_measurements.append(LEFT_EYE_APERTURE_MEASUREMENT)

            RETx = result.multi_face_landmarks[0].landmark[RIGHT_EYE_TOP_INDEX].x
            RETy = result.multi_face_landmarks[0].landmark[RIGHT_EYE_TOP_INDEX].y
            REBx = result.multi_face_landmarks[0].landmark[RIGHT_EYE_BOTTOM_INDEX].x
            REBy = result.multi_face_landmarks[0].landmark[RIGHT_EYE_BOTTOM_INDEX].y

            RET = (RETx, RETy)
            REB = (REBx, REBy)

            RIGHT_EYE_APERTURE_MEASUREMENT = euclidean_distance(RET, REB)
            right_eye_aperture_measurements.append(RIGHT_EYE_APERTURE_MEASUREMENT)

            #### RECORDING MOUTH & LIPS ###############################################
            MTx = result.multi_face_landmarks[0].landmark[MOUTH_TOP_INDEX].x
            MTy = result.multi_face_landmarks[0].landmark[MOUTH_TOP_INDEX].y
            MBx = result.multi_face_landmarks[0].landmark[MOUTH_BOTTOM_INDEX].x
            MBy = result.multi_face_landmarks[0].landmark[MOUTH_BOTTOM_INDEX].y
            MLx = result.multi_face_landmarks[0].landmark[MOUTH_LEFT_SIDE_INDEX].x
            MLy = result.multi_face_landmarks[0].landmark[MOUTH_LEFT_SIDE_INDEX].y
            MRx = result.multi_face_landmarks[0].landmark[MOUTH_RIGHT_SIDE_INDEX].x
            MRy = result.multi_face_landmarks[0].landmark[MOUTH_RIGHT_SIDE_INDEX].y

            MT = (MTx, MTy)
            MB = (MBx, MBy)
            ML = (MLx, MLy)
            MR = (MRx, MRy)

            MOUTH_TOP_BOTTOM_APERTURE_MEASUREMENT = euclidean_distance(MT, MB)
            mouth_top_bottom_aperture_measurements.append(MOUTH_TOP_BOTTOM_APERTURE_MEASUREMENT)

            MOUTH_LEFT_RIGHT_APERTURE_MEASUREMENT = euclidean_distance(ML, MR)
            mouth_left_right_aperture_measurements.append(MOUTH_LEFT_RIGHT_APERTURE_MEASUREMENT)
            
            ## SHOWING VIDEO / FRAMES AND PRINTING POINTS ##########################################
#             for facial_landmarks in result.multi_face_landmarks:
#                 for i in MOUTH_LIPS_EYES_BASIC: # here we are only scanning the first 20 features, we can go up to 450+
#                     point = facial_landmarks.landmark[i]
#                     x = int(point.x * width) # this converts the scaled coordinate to absolute pixel value.
#                     y = int(point.y * height)

#                     cv2.circle(frame, (x,y), 2, (255,0,0), -1) # draws a circle on the frame.
#                             #frame, coord, radius, color, fillcolor

#                     cv2.putText(frame, str(i), (x, y), 0, 0.5, (0,0,0)) # writes a label i index on the frame.
#                             #frame, string of index, coord, font, scale, color.

#                   # cv2_imshow(frame) ## THIS ONLY WORKS IN GOOGLE COLAB. USE NORMAL CV2.
#                   # cv2.imshow("here_is_your_image", frame)  
#                   # cv2.imshow(frame)

#             key = cv2.waitKey(1)
#             if key == ord('q'):
#                 break

        except Exception as e:
            # Append the lists with numpy NaNs and continue running the code.
            # We can deal with this in pre-processing the data later.
            right_eye_aperture_measurements.append(np.nan)
            left_eye_aperture_measurements.append(np.nan)
            mouth_top_bottom_aperture_measurements.append(np.nan)
            mouth_left_right_aperture_measurements.append(np.nan)

            # show the input frame with no annotations because the face mesh failed:
            # cv2_imshow(rgb_frame)
            # cv2.imshow("here_is_your_image", rgb_frame)
            # cv2.imshow(rgb_frame)
          
    # cv2.destroyAllWindows()   # Close any open windows // This doesn't work in this environment.
    cap.release()             # Release the video capture object
            
            ### GENERATE APERTURE MEASUREMENT DATAFRAME #################################
    eyes_mouth_aperture_measurements = {
        'left_eye_aperture_measurements': left_eye_aperture_measurements,
        'right_eye_aperture_measurements': right_eye_aperture_measurements,
        'mouth_top_bottom_aperture_measurements': mouth_top_bottom_aperture_measurements,
        'mouth_left_right_aperture_measurements': mouth_left_right_aperture_measurements,
    }
    eyes_mouth_aperture_measurements_df = pd.DataFrame(eyes_mouth_aperture_measurements)
    eyes_mouth_aperture_measurements_df['time_derived_from_fps'] = eyes_mouth_aperture_measurements_df.index / fps

    return eyes_mouth_aperture_measurements_df

# Video -> DataFrame -> CSV
### ***LOOPs all videos! (many hours)***

In [None]:
# Here, I will load each video into the analysis code. Then save the dataframe as a csv file.
def video_to_df_to_csv(video_input_data):
    """
    This function is designed to be used in a loop. It will take a video dictionary in, locate the video, process the landmark
    features into a dataframe, then write that csv file. 
    """
    if not os.path.exists(DIRECTORY_TO_CSV): # if the directory doesn't exist, make it.
            os.makedirs(DIRECTORY_TO_CSV)
            print(f"Directory {DIRECTORY_TO_CSV} created.")

    save_location_and_name = f"{DIRECTORY_TO_CSV}/{video_input_data['csv_filename']}"
    
    if os.path.exists(save_location_and_name): # if the csv file already exists, skip, else continue.
        print(f"CSV file already exists at: {save_location_and_name}.\n>> Skipping  {video_input_data['video_path_and_filename']}")
    else:
        # Process the video, export the dataframe. Report number of NaNs found.
        print(f"Processing video, generating dataframe: {video_input_data['video_path_and_filename']}")
        df = video_tracking_to_df(video_input_data['video_path_and_filename'])      
        number_of_nans = df.isna().sum().sum()
        print(f"Number of rows containing NaN values: {number_of_nans}")
        
        # Save the dataframe to csv.
        print(f"Saving df data to csv: {save_location_and_name}")
        df.to_csv(save_location_and_name)
        
        # Print success statement.
        print(f"Video {video_input_data['video_path_and_filename']} face features were successfully saved in a dataframe!")


start_time_total = time.time()
video_count = 1
num_videos = len(VIDEO_DICT.items())

for video_key, video_data in VIDEO_DICT.items():
    print(f"==== PROCESSING ==== ({video_count}/{num_videos}) ==== ({round((video_count/num_videos)*100, 1)}%) =============================================")
    
    start_time = time.time()
    video_to_df_to_csv(video_data)
    
    end_time = time.time()
    execution_time = end_time - start_time
    print(f">> Execution time for this video: {execution_time:.1f} seconds.\n")
    video_count += 1

end_time_total = time.time()
execution_time_total = end_time_total - start_time_total
print(f"Congratulations, all finished in {round(execution_time_total/60, 2)} minutes.")

# CSV data -> Plots -> PNG Images!

##### --> Assumes ALL videos in the dictionary have a csv in the csv directory!

In [None]:
def csv_to_plot(video_input_data):
    """
    This function is designed to be used in a loop. It will take a video dictionary in, locate the video file, corresponding csv file
    then process it into a graph.
    """
    csv_file = os.path.join(DIRECTORY_TO_CSV, f"{video_input_data['csv_filename']}")
    if os.path.exists(csv_file):                    # IF there is a csv file, load it.
        # load the csv into a dataframe         
        df = pd.read_csv(csv_file)
        
        # before generating the plot, check if the plot directory/plot images exist. 
        if not os.path.exists(DIRECTORY_TO_PNG):  # IF there is no plot directory, make it.
            print(f">> Discovered that directory '{DIRECTORY_TO_PNG}' doesn't exist. Creating directory...") 
            os.makedirs(DIRECTORY_TO_PNG)
            print(f">> Directory '{DIRECTORY_TO_PNG}' created successfully! Continue...\n")
            # For demonstration only, remove this from the final code as it will definitely exist.
            # Calculate time in seconds based on the frame count and frame rate    
                
        csv_filename_only = video_input_data['csv_filename'].replace('.csv', '')
        # num_rows_with_nan = df.isna().any(axis=1).sum()       # only counts full rows.
        number_of_nans = df.isna().sum().sum()                   # counts total NaNs 
        png_filename = f"{csv_filename_only}__({number_of_nans}-NaNs).png"
            
        # Now proceed to save the plot within the directory
        png_directory_and_filename = os.path.join(DIRECTORY_TO_PNG, png_filename)
        if not os.path.exists(png_directory_and_filename):   # checking if the image exists
            # Now that we know the csv exists and the image does not exist, we can create the image from the csv. 
            
# ############ This code below to generate the time column will be removed once all CSVs are made with it. ###############################
#             if 'time_derived_from_fps' not in df.columns:
#                 print("No time data, so I will generate it.")
#                 df['time_derived_from_fps'] = df.index / video_input_data['fps']                
# #######################################################################################################################################                                
            # Create a figure and axis with adjusted figure size
            fig, ax = plt.subplots(figsize=(21,12))

            # Plotting apertures
            ax.plot(df['time_derived_from_fps'], df['left_eye_aperture_measurements'], label='Left Eye Aperture', color='red')
            ax.plot(df['time_derived_from_fps'], df['right_eye_aperture_measurements'], label='Right Eye Aperture', color='blue')
            ax.plot(df['time_derived_from_fps'], df['mouth_top_bottom_aperture_measurements'], label='Mouth Top-Bottom Aperture', color='green')
            ax.plot(df['time_derived_from_fps'], df['mouth_left_right_aperture_measurements'], label='Mouth Left-Right Aperture', color='orange')

            # Add labels and legend
            ax.set_xlabel('Time derived from FPS (seconds)', fontsize=14, fontweight='bold')
            time_in_seconds = df['time_derived_from_fps']
            time_labels = np.arange(0, time_in_seconds.max() + 1, 20)  # Show labels every 10 seconds
            ax.set_xticks(time_labels)
            ax.set_ylabel('Aperture Measurement', fontsize=14, fontweight='bold')
            title = f"{csv_filename_only} ({number_of_nans} NaNs) - Eyes and Mouth Aperture Measurements vs. FPS Derived Time"
            ax.set_title(title, fontsize=18, fontweight='bold')
            ax.legend()

            # Add gridlines in soft gray
            ax.grid(color='gray', linestyle=':', linewidth=0.5)

            # Display the plot
            # plt.show()

            # save the plot and close the memory.
            plt.savefig(png_directory_and_filename)
            plt.close()                   
        else:
            print(f">> Skipping file as '{png_directory_and_filename}' already exists in directory.")
    else:        
        print(f">> Skipping file as '{csv_file}' does not exist in directory.")

    # df = "reducing memory usage"
    
    
start_time_total = time.time()
video_count = 1                       # counting csv files
num_videos = len(VIDEO_DICT.items())  # counting csv files

for video_key, video_data in VIDEO_DICT.items():
    print(f"==== PROCESSING ==== ({video_count}/{num_videos}) ==== ({round((video_count/num_videos)*100, 1)}%) ======================================================================")
    
    start_time = time.time()
    csv_to_plot(video_data)
    
    end_time = time.time()
    execution_time = end_time - start_time
    print(f">> Time to plot csv for this video: {execution_time:.1f} seconds.\n")
    video_count += 1

end_time_total = time.time()
execution_time_total = end_time_total - start_time_total
print(f"Congratulations, all finished in {round(execution_time_total/60, 2)} minutes.")


consider changing the floating point values to save memory
surely 4-5 floating point values are sufficient?
--> Use float32 for max 7 decimal places.

# 2. Data Pre-Processing

## 2.1 Import DataFrames --> Impute

### 2.1.1 First, make a copy of the original csv files for backup.

In [None]:
if os.path.exists(DIRECTORY_TO_NO_NANS_CSV):
    print(f">> Oh no, the path ~/{DIRECTORY_TO_NO_NANS_CSV} already exists.\n>>>> Maybe you already made the csv files?")
else:
    print(f">> ~/{DIRECTORY_TO_NO_NANS_CSV} doesn't exist.\n>>>> Creating and copying files now...")
    shutil.copytree(DIRECTORY_TO_CSV, DIRECTORY_TO_NO_NANS_CSV)
    print(f">> Successfully copied directory ~/{DIRECTORY_TO_NO_NANS_CSV}.\n")
    print(">>>> CONFIRM YOUR FOLDER. THERE COULD BE CACHED FILES COPIED IN IT.")

### 2.1.2 Make a list of the csv files for iteration

In [None]:
def generate_csv_list_for_impute(directory):
    """
    Takes a directory with csv files then generates a list containing their directory paths that can be iterated over.
    """
    csv_list_for_impute = []  
    for root, dirs, files in os.walk(DIRECTORY_TO_NO_NANS_CSV):
        for file in files:
            # Check if the file has a video extension (you can add more extensions)
            if file.lower().endswith(('.csv')):
                csv_path_and_filename = os.path.join(root, file)
                csv_list_for_impute.append(csv_path_and_filename)
    
    print(csv_list_for_impute)
    print(f"\nThere are {len(csv_list_for_impute)} csv files in your list!")
    return csv_list_for_impute

# Generate a list of all CSVs for ALL CSVs in the directory
list_of_csv_files_for_impute = generate_csv_list_for_impute(DIRECTORY_TO_NO_NANS_CSV)

### 2.1.3 Load CSV (convert floats, remove time column), Impute NaNs, return to CSV

#### **WARNING**: This function will **OVERWRITE** csv files with NaNs with imputed csv files. Make sure you are working on copies!

In [None]:
from sklearn.impute import SimpleImputer

def impute_nans(list_of_csv_files):
    """
    Takes in a list of csv files. Each csv is opened as a df, then checked for NaNs, then imputed, then saved as csv again.
    The saved csv has the same filename, so it's impotant that you are iterating over a list of copied csv files.
    """
    # List of columns we can impute. TIME COLUMN IS NOT FOR IMPUTING!!! think of the time as the index :)
    df_list_of_measured_cols = [
        'left_eye_aperture_measurements',
        'right_eye_aperture_measurements', 
        'mouth_top_bottom_aperture_measurements',
        'mouth_left_right_aperture_measurements'
    ]
    # keep track of how many files have nans.
    number_of_files_with_nans = 0
    
    # Iterate over the list of csv
    for csv_file in list_of_csv_files:
        # make a dataframe from each csv file
        df = pd.read_csv(csv_file)
        # count NaNs
        number_of_nans = df.isna().sum().sum()   
        if number_of_nans > 0:
            number_of_files_with_nans += 1
            print(f">> ~/{csv_file} has NaNs. Preparing to impute...")
            # iterate over each column in the df to impute nans.
            for column in df_list_of_measured_cols:
                # Instantiate a SimpleImputer object with your strategy of choice
                imputer = SimpleImputer(strategy="mean") 
                # Call the "fit" method on the object 
                imputer.fit(df[[column]])
                # Call the "transform" method on the object
                df[column] = imputer.transform(df[[column]])
                print(f">>>> '{column}' column imputed.")
            print(f">> ~/{csv_file}: All columns imputed.\n")
            # Overwrites the original csv file.
            df.to_csv(csv_file, index=False) 
        else:
            print(f">> {csv_file} has NO NaN values. Skipping file with no modifications.\n")
    print(f"Success! Directory had >> {number_of_files_with_nans} << csv files with NaNs corrected.")


impute_nans(list_of_csv_files_for_impute)

#### (optional) Plot the new graphs to see if they look OK. Just open and inspect is OK, too.

# 3. LSTM, BABY!

### Step 1A: Sort Lists of Classified CSV files

In [8]:
# Initialize lists to hold the paths of CSV files for each class
drowsy_csv_files = []
normal_csv_files = []
alert_csv_files = []

# Iterate over the files in the CSV folder
for filename in os.listdir(DIRECTORY_TO_NO_NANS_CSV):
    if filename.endswith(".csv"):
        filepath = os.path.join(DIRECTORY_TO_NO_NANS_CSV, filename)
        if "class_0" in filename:  # Assuming filenames contain 'class_0' for alert videos
            alert_csv_files.append(filepath)
        elif "class_5" in filename:  # Assuming filenames contain 'class_5' for normal videos
            normal_csv_files.append(filepath)
        elif "class_10" in filename:  # Assuming filenames contain 'class_10' for drowsy videos
            drowsy_csv_files.append(filepath)

In [9]:
# Verify sorting:
print(f"Drowsy csv files: {len(drowsy_csv_files)}\n {drowsy_csv_files}\n")
print(f"Normal csv files: {len(normal_csv_files)}\n {normal_csv_files}\n")
print(f"Alert csv files: {len(alert_csv_files)}\n {alert_csv_files}\n")

Drowsy csv files: 48
 ['videos_fold_1/csv_data_no_nans/data_person_19_class_10.csv', 'videos_fold_1/csv_data_no_nans/data_person_4_class_10.csv', 'videos_fold_1/csv_data_no_nans/data_person_46_class_10.csv', 'videos_fold_1/csv_data_no_nans/data_person_14_class_10.csv', 'videos_fold_1/csv_data_no_nans/data_person_36_class_10.csv', 'videos_fold_1/csv_data_no_nans/data_person_8_class_10.csv', 'videos_fold_1/csv_data_no_nans/data_person_39_class_10.csv', 'videos_fold_1/csv_data_no_nans/data_person_41_class_10.csv', 'videos_fold_1/csv_data_no_nans/data_person_45_class_10.csv', 'videos_fold_1/csv_data_no_nans/data_person_35_class_10.csv', 'videos_fold_1/csv_data_no_nans/data_person_27_class_10.csv', 'videos_fold_1/csv_data_no_nans/data_person_5_class_10.csv', 'videos_fold_1/csv_data_no_nans/data_person_38_class_10.csv', 'videos_fold_1/csv_data_no_nans/data_person_7_class_10.csv', 'videos_fold_1/csv_data_no_nans/data_person_37_class_10.csv', 'videos_fold_1/csv_data_no_nans/data_person_13_clas

### Step 1B: Load and Pre-process each dataframe to be the same length.

In [10]:
# If you want to see any of the data, use this to get many decimal places.
# The full length values are still there, up to 18dp, it just limits what it shows you.
# pd.set_option('display.float_format', '{:.8f}')  # Display up to 8 decimal places

def convert_csv_to_df(csv_file):
    """
    Here each csv file will be stored as a dataframe.
    If memory becomes a problem, load the frames using float32.
    If memory is still an issue, you can drop one of the eye columns as the left and right eye are basically the same.
    """
    df = pd.read_csv(csv_file, index_col=0)
    # Process and impute data as needed
    return df


def preprocessing_on_dataframe(df, MAX_ROWS, TRIM_ROWS):
    """
    Here we will load the dataframes, then trim and pad it as needed.
    Returns a preprocessed dataframe!
    """
    # Checking if the df is long enough
    if len(df) > MAX_ROWS:
        trimmed_df = df.iloc[TRIM_ROWS:-TRIM_ROWS]
        if len(trimmed_df) > MAX_ROWS: # If the trimmed df is STILL longer, then just cut it down.
            trimmed_df = trimmed_df.iloc[0:MAX_ROWS]
    else:
        trimmed_df = df
    # Determine how many extra rows to add to make the dataframe MAX_ROWS length.
    padding_length = MAX_ROWS - len(trimmed_df)
    padded_df = pd.concat([trimmed_df, pd.DataFrame(0, index=range(padding_length), columns=trimmed_df.columns)]).reset_index(drop=True)
    return padded_df


drowsy_dataframes_trimmed = []
normal_dataframes_trimmed = []
alert_dataframes_trimmed = []


for csv_file in drowsy_csv_files:
    df = convert_csv_to_df(csv_file)
    df_preprocessed = preprocessing_on_dataframe(df, MAX_ROWS=MAX_ROWS, TRIM_ROWS=TRIM_ROWS)
    drowsy_dataframes_trimmed.append(df_preprocessed)

for csv_file in normal_csv_files:
    df = convert_csv_to_df(csv_file)
    df_preprocessed = preprocessing_on_dataframe(df, MAX_ROWS=MAX_ROWS, TRIM_ROWS=TRIM_ROWS)
    normal_dataframes_trimmed.append(df_preprocessed)

for csv_file in alert_csv_files:
    df = convert_csv_to_df(csv_file)
    df_preprocessed = preprocessing_on_dataframe(df, MAX_ROWS=MAX_ROWS, TRIM_ROWS=TRIM_ROWS)
    alert_dataframes_trimmed.append(df_preprocessed)

In [14]:
# Verify dataframe shapes
for dataframe in drowsy_dataframes_trimmed:
    print(dataframe.shape)

(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)
(18000, 5)


### XXXXXX Step 2: Data Labeling


### Step 3: Data Splitting

In this code, we're first combining all the preprocessed data from different classes into a single list called all_data. Then, we create corresponding labels for each data instance based on the class. Finally, we use the train_test_split function to split both the data and labels into train and test sets. The test_size parameter controls the proportion of data that goes into the test set (in this case, 20%). This way, you'll have a train set (train_data and train_labels) and a test set (test_data and test_labels) that you can use for training your LSTM model and evaluating its performance.

Setting `random_state` to a specific value (e.g., 42) means that every time you run your code with the same value of random_state, you will get the same split of data into train and test sets. This can be very useful for debugging, testing, and sharing code, as it ensures that others can reproduce the same results as you. In other words, if you set random_state=42 and someone else runs the code with the same parameter value, they will get the exact same train and test split that you got. This helps maintain consistency and reproducibility in your experiments.

## **CAUTION - HERE YOU WILL DECIDE WHICH DATA TO INCLUDE IN THE TRAINING!**

In [15]:
# Combine data from all classes
all_data = drowsy_dataframes_trimmed + alert_dataframes_trimmed # + normal_dataframes_trimmed
labels_all_data = [2] * len(drowsy_dataframes_trimmed) + [0] * len(alert_dataframes_trimmed) # + [1] * len(normal_dataframes_and_labels)

binary_data = drowsy_dataframes_trimmed + alert_dataframes_trimmed
labels_binary_data = [1] * len(drowsy_dataframes_trimmed) + [0] * len(alert_dataframes_trimmed)

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(binary_data, labels_binary_data, test_size=0.20, random_state=42)

In [17]:
# Verify
print(len(X_train))
print(len(X_test))
print(X_train[0].shape)
y_test[7]
print(len(y_train))
print(len(y_test))

76
20
(18000, 5)
76
20


### Step 4: Data Normalization (Scaling) [**TIME COLS REMOVED**]

In [18]:
# Initialize the StandardScaler
scaler = StandardScaler()

# Normalize the training and test data:
X_train_normalized = []
for df in X_train:
    # time_column = df["time_derived_from_fps"]  # Extract time column / exclude the time column
    df_features = df.drop(["time_derived_from_fps"], axis=1)  # Exclude the time column
    normalized_features = scaler.fit_transform(df_features)
    normalized_df = pd.DataFrame(normalized_features, columns=df_features.columns)
    # normalized_df["time_derived_from_fps"] = time_column  # Reinsert the time column / exclude the time column
    X_train_normalized.append(normalized_df)

X_test_normalized = []
for df in X_test:
    # time_column = df["time_derived_from_fps"]  # Extract time column / exclude the time column
    df_features = df.drop(["time_derived_from_fps"], axis=1)  # Exclude the time column
    normalized_features = scaler.transform(df_features)
    normalized_df = pd.DataFrame(normalized_features, columns=df_features.columns)
    # normalized_df["time_derived_from_fps"] = time_column  # Reinsert the time column / exclude the time columns
    X_test_normalized.append(normalized_df)

print("training:")
print(X_train_normalized)
print("test:")
print(X_test_normalized)

training:
[       left_eye_aperture_measurements  right_eye_aperture_measurements  \
0                           -0.125338                        -0.193958   
1                            0.427337                         0.271098   
2                            0.712606                         0.532782   
3                            0.691571                         0.517007   
4                            0.650017                         0.499594   
...                               ...                              ...   
17995                       -2.139748                        -2.141273   
17996                       -2.139748                        -2.141273   
17997                       -2.139748                        -2.141273   
17998                       -2.139748                        -2.141273   
17999                       -2.139748                        -2.141273   

       mouth_top_bottom_aperture_measurements  \
0                                    0.954268   
1 

In [19]:
print(len(X_train_normalized))
print(len(X_test_normalized))

76
20


### Step 5A: Build LSTM Model

The choice between using 'sparse_categorical_crossentropy' vs. 'categorical_crossentropy' and 'accuracy' vs. 'categorical_accuracy' depends on the label representation and the activation function used in the output layer.

Loss Function ('sparse_categorical_crossentropy' vs. 'categorical_crossentropy'):

If your labels are integers representing class indices (e.g., 0, 1, 2), then you should use 'sparse_categorical_crossentropy'.
If your labels are one-hot encoded (binary representation) for each class, then you would use 'categorical_crossentropy'.
Metrics ('accuracy' vs. 'categorical_accuracy'):

If you're using 'sparse_categorical_crossentropy' as the loss function, then 'accuracy' is used to compute the accuracy metric.
If you're using 'categorical_crossentropy' and your labels are one-hot encoded, then you would use 'categorical_accuracy'.
In your case, since you're using integer class indices and 'sparse_categorical_crossentropy' as the loss function, 'accuracy' is the correct metric to use.

In [20]:
X_train_normalized[0].shape

(18000, 4)

## Slim Down The Dataframes and Keep Target Data for each

In [36]:
def split_my_dataframe(x_set, y_set, row_splits: int):
    """
    Pass in a list of corresponding dataframes (x_set) and target labels (y_set), e.g. X_train and y_train, equal length lists.
    Function will split each df inside the X_set, and create the same number of targets as the number of splits.
    This function assumes all dataframes inside the x_set are the same length i.e. same number of rows.
    e.g. a df with 500 rows and row_chunks 50 will return 10 dataframes of length 50 each.
    !!!!!! Only pass in values that will result in an integer ratio of len(df) // row_chunks!!!!!
    Returns the new split df X_set and y_set to pass through to the model.
    """
    len_x = len(x_set)
    len_y = len(y_set)

    if len_x == len_y and len_x != 0:                       # This will only run if the lists contain data and are the same length.
        original_df_length = len(x_set[0])                  # 18,000 rows
        num_splits = original_df_length // row_splits       # 18,000 rows / 150 rows (so about 5 seconds of frame data 30fps per chunk) = 150 chunks
        print(f">> Performed {num_splits} splits on {len_x} dataframes with {row_splits} rows each.")
        
        list_of_split_dfs = []
        list_of_targets_expanded = []        
        
        for i in range(len_x):
            df = x_set[i]                    # gets the df in the i position in x_set
            target_to_expand = y_set[i]      # gets the target value in the i position in y_set, corresponding to x_set.

            split_count = 0

            for j in range(num_splits):
                # split and append the list_of_split_dfs
                split_df = df.iloc[split_count * row_splits : (split_count + 1) * row_splits]
                list_of_split_dfs.append(split_df)
                # multiply the class/target value by the number of splits and append the list with the target value num_split times.
                list_of_targets_expanded.append(target_to_expand) 
                
                split_count += 1
    
    return list_of_split_dfs, list_of_targets_expanded
    

x_train_normalized_split, y_train_split = split_my_dataframe(X_train_normalized, y_train, 150)
print(f"---- len(x_train_normalized_split) = {len(x_train_normalized_split)}. len(y_train_split) = {len(y_train_split)}.")
x_test_normalized_split, y_test_split = split_my_dataframe(X_test_normalized, y_test, 150)
print(f"---- len(x_test_normalized_split) = {len(x_test_normalized_split)}. len(y_test_split) = {len(y_test_split)}.")

>> Performed 120 splits on 76 dataframes with 150 rows each.
---- len(x_train_normalized_split) = 9120. len(y_train_split) = 9120.
>> Performed 120 splits on 20 dataframes with 150 rows each.
---- len(x_test_normalized_split) = 2400. len(y_test_split) = 2400.


In [37]:
# Define input shape (number of time steps and number of features)
input_shape = (x_train_normalized_split[0].shape[0], x_train_normalized_split[0].shape[1])
print("Input Shape:", input_shape)
# Define number of classes
num_classes = 1  # Using 3 classes: alert 0, normal 1, drowsy 2.
                 # If you are doing binary, use 1.

Input Shape: (150, 4)


In [47]:
def initialize_model(input_shape, num_classes):
    """
    First try - relu activation, sparse_categorical_crossentropy, Adam, softmax.
    Second try - tahn activation, binary_crossentropy
    """
    # Set up a sequential model
    model = Sequential()
    # Catch and remove the padded data with a masking layer:
    model.add(Masking(mask_value=0, input_shape=input_shape))
    # LSTM layer 1
    model.add(LSTM(units=64, return_sequences=False, activation='tanh'))
    # LSTM layer 2
    # model.add(LSTM(units=128, return_sequences=False, activation='tanh'))
    # model.add(Dropout(0.2))
    # Dense layer
    model.add(Dense(64, activation='tanh'))
    # model.add(Dropout(0.2))
    # Dense layer
    model.add(Dense(32, activation='tanh'))
    
    # Output layer
    model.add(Dense(num_classes, activation='sigmoid'))
    
    return model


# Initialize the LSTM model
model = initialize_model(input_shape, num_classes)

# Custom optimiser to tackle the loss nan problem:
# opt = tensorflow.keras.optimizers.Adam(learning_rate=0.01)

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='Adam', metrics=['accuracy'])
# Print model summary
model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 masking_3 (Masking)         (None, 150, 4)            0         
                                                                 
 lstm_6 (LSTM)               (None, 150, 64)           17664     
                                                                 
 lstm_7 (LSTM)               (None, 150, 128)          98816     
                                                                 
 lstm_8 (LSTM)               (None, 128)               131584    
                                                                 
 dense_9 (Dense)             (None, 64)                8256      
                                                                 
 dense_10 (Dense)            (None, 32)                2080      
                                                                 
 dense_11 (Dense)            (None, 1)                

### Step 5B: Train LSTM Model

#### Data needs to be "reshaped" before it can be passed in to the fit function.

In [39]:
# Convert lists to numpy arrays
X_train_fit = np.array(x_train_normalized_split)
X_test_fit = np.array(x_test_normalized_split)
y_train_fit = np.array(y_train_split)
y_test_fit = np.array(y_test_split)

# Re-size the y output to match the dimensions of the input (a,b,c...)
y_train_fit_resized = y_train_fit.reshape(-1, 1)
y_test_fit_resized = y_test_fit.reshape(-1, 1)

In [41]:
print(X_train_fit.shape)
print(X_test_fit.shape)
print(y_train_fit_resized.shape)
print(y_test_fit_resized.shape)

(9120, 150, 4)
(2400, 150, 4)
(9120, 1)
(2400, 1)


In [42]:
# sess.graph contains the graph definition; that enables the Graph Visualizer.
file_writer = tensorflow.summary.create_file_writer(DIRECTORY_TO_LOG)

In [48]:
es = EarlyStopping(
    patience = 50,
    restore_best_weights = True
)

tb_callback = TensorBoard(log_dir=DIRECTORY_TO_LOG)
# ADD CODE HERE TO MAKE / CHECK THE DIRECTORY AUTOMATICALLY

history = model.fit(
    X_train_fit, 
    y_train_fit_resized, 
    batch_size=32, 
    epochs=300,
    validation_data=(X_test_fit, y_test_fit_resized),
    callbacks = [es, tb_callback], 
    verbose=1
)

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300

KeyboardInterrupt: 

In [None]:
# model.evaluate(X_test, y_test)