In [2]:
%matplotlib inline
%config InlineBackend.figure_formats = ['svg']

'1.10.0'

# Feature Extraction

In [None]:
def eye_aspect_ratio(eye):
   # calculating ratio of length and width of eyes
    A = distance.euclidean(eye[1], eye[5])
    B = distance.euclidean(eye[2], eye[4])
    C = distance.euclidean(eye[0], eye[3])
    ear = (A + B) / (2.0 * C)
    return ear

In [None]:
def mouth_aspect_ratio(mouth):
    #calculating ratio of length and width of mouth
    A = distance.euclidean(mouth[14], mouth[18])
    C = distance.euclidean(mouth[12], mouth[16])
    mar = (A ) / (C)
    return mar

In [None]:
def circularity(eye):
    """Calculates PUC -> low perimeter leads to lower pupil"""
    A = distance.euclidean(eye[1], eye[4])
    radius  = A/2.0
    Area = math.pi * (radius ** 2)
    p = 0
    p += distance.euclidean(eye[0], eye[1])
    p += distance.euclidean(eye[1], eye[2])
    p += distance.euclidean(eye[2], eye[3])
    p += distance.euclidean(eye[3], eye[4])
    p += distance.euclidean(eye[4], eye[5])
    p += distance.euclidean(eye[5], eye[0])
    return 4 * math.pi * Area / (p**2)

In [None]:
def mouth_over_eye(eye):
    #calculating ratio of MAR to EAR
    ear = eye_aspect_ratio(eye)
    mar = mouth_aspect_ratio(eye)
    mouth_eye = mar/ear
    return mouth_eye

In [10]:
def frame_extraction(folder = "Fold5_part2", participants = [55, 60], MAX_FRAMES = 240):
    def captureFrame(sec):
        # setting the starting time at 3 minutes
        start = 180000
        #setting the timestamp to get the frame at that particular second
        vidcap.set(cv2.CAP_PROP_POS_MSEC, start + sec*1000)
        #gets the frame from the specified second
        ret_frame, image = vidcap.read()
        
        if ret_frame:
            #if there is no existing  folder
            if not (os.path.exists(r"D:\\Downloads\\img\\" + str(parti_num) + "\\")):
                #making new folder
                os.mkdir(r"D:\\Downloads\\img\\" + str(parti_num))
            #saving the frame
            cv2.imwrite(r"D:\\Downloads\\img\\" + str(parti_num) + "\\p" + str(parti_num) + "_s" + str(state) +
                        "_" + str(frame_count) + "sec.jpg", image)    
        return ret_frame, image

    
    
    parti_num, state = 0, 0
    # Loop through all videos in a specific folder. Best results with .mov files.
    for j in participants:
        for i in np.arange(0, 11, 5):
            #clearing the output at every frame otherwise it will create flickering
            clear_output(wait=True)
            print(f'Starting with participant {j} and state {i}')
            values = []
            labels = []
            #reading the video
            vidcap = cv2.VideoCapture(r'D:\\Downloads\\' + str(folder) + '\\' + str(j) +'\\' + str(i) + '.mov')
            # printing the number of frames in the video
            print(f"Total Frames in selected Video {int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))}")
            sec = 0
            frameRate = 1
            frame_count = 0
            acquire_frame, image  = captureFrame(sec)
            parti_num = j
            state = i
            # Extract frames (per video), the more frames the longer the extraction takes
            while acquire_frame and frame_count < MAX_FRAMES: 
                # function from mlxtend library to extract detect and extract face shape
                landmarks = extract_face_landmarks(image)
                if landmarks is not None and sum(sum(landmarks)) != 0:
    
                    frame_count += 1
                    values.append(landmarks)
                    labels.append([i])
                    #adding framerate to get the next frame
                    sec = sec + frameRate
                    sec = round(sec, 2)
                    #capture next frame
                    acquire_frame, image = captureFrame(sec)
                    print(frame_count)
                else:  
                    #if face not detected
                    sec = sec + frameRate
                    sec = round(sec, 2)
                    acquire_frame, image = captureFrame(sec)
                    print("not detected")
            if acquire_frame:
                values = np.array(values)
                labels = np.array(labels)
                print(f"Data {values.shape}")
                print(f"Labels {labels.shape}")
                interested_features = []
                # eye and mouth feature extraction referred from https://github.com/sandyying/APM-Drowsiness-Detection/blob/master/Feature%20Extraction.ipynb
                for k in values:
                    # extracting only the interested features i.e., eye and mouth
                    key_landmark = k[36:68]
                    # here we create our features for our base classifier
                    ear = eye_aspect_ratio(key_landmark)
                    mar = mouth_aspect_ratio(key_landmark)
                    cir = circularity(key_landmark)
                    mouth_eye = mouth_over_eye(key_landmark)
                    interested_features.append([int(parti_num), ear, mar, cir, mouth_eye])
                interested_features = np.array(interested_features)
                print(f"Features {interested_features.shape}")
                # saving captured features in csv
                np.savetxt('D:\\Downloads\\data_drowsiness\\' + str(folder) +'_features_'+str(parti_num)+'_'+str(state)+'.csv', interested_features, delimiter = ",")
                np.savetxt('D:\\Downloads\\data_drowsiness\\' + str(folder) +'_labels_'+str(parti_num)+'_'+str(state)+'.csv', labels, delimiter = ",")

## Extracting Frames from Video

In [None]:
%%time
#I have saved the videos dataset at this location and I am loading it from my saved location
x = os.listdir('D:\\Downloads\\')
for i,j in zip(x, [os.listdir('D:\\Downloads\\'+i) for i in x]):
    frame_extraction(folder=i, participants=j)

In [12]:
pd.read_csv(r"D:\\Downloads\\data_drowsiness\Fold1_part1_features_1_5.csv", header=None, names=["Participant", "EAR", "MAR", "Circularity", "MOE"])

Unnamed: 0,Participant,EAR,MAR,Circularity,MOE
0,1.0,0.306011,0.850631,0.439164,2.779737
1,1.0,0.300157,0.883182,0.430257,2.942402
2,1.0,0.318335,0.875474,0.466570,2.750171
3,1.0,0.302996,0.887505,0.454279,2.929104
4,1.0,0.340870,0.890336,0.496024,2.611952
...,...,...,...,...,...
235,1.0,0.309938,0.816165,0.432415,2.633318
236,1.0,0.298477,0.787879,0.436982,2.639667
237,1.0,0.260300,0.841359,0.421348,3.232268
238,1.0,0.299461,0.810135,0.419005,2.705307


In [None]:
import torch
image = image * torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1).to(device) + torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1).to(device)

In [13]:
# reading all the features and labels and combining them together
df_features = [pd.read_csv(f, header=None, names=["Participant", "EAR", "MAR", "Circularity", "MOE"]) for f in glob.glob("D:\\Downloads\\data_drowsiness\Fold*_features_*.csv")]
df_labels = [pd.read_csv(f, header=None, names=["Y"]) for f in glob.glob("D:\\Downloads\\data_drowsiness\Fold*_labels_*.csv")]

df = pd.concat([pd.concat(df_features,ignore_index=True), pd.concat(df_labels,ignore_index=True)], axis=1)
print(df.shape) 
# Save merged file


(18480, 6)


In [15]:
df.to_csv(r'D:\\Downloads\\data_drowsiness\merged\combined_feature_label.csv',index=False)
df.head()

Unnamed: 0,Participant,EAR,MAR,Circularity,MOE,Y
0,1.0,0.312688,0.792397,0.432896,2.534145,0.0
1,1.0,0.32194,0.766199,0.486923,2.379947,0.0
2,1.0,0.331216,0.688449,0.456029,2.07855,0.0
3,1.0,0.309246,0.776136,0.452655,2.509767,0.0
4,1.0,0.204691,0.734189,0.361123,3.586821,0.0


# Standardization

In [19]:
df_combined = pd.read_csv(r'D:\\Downloads\\data_drowsiness\\merged\\combined_feature_label.csv')

# we have to rearrange the columns
column = df_combined.columns.tolist()
column = column[-1:] + column[4:5] + column[:4]
df_combined = df_combined[column]
df_combined

Unnamed: 0,Y,MOE,Participant,EAR,MAR,Circularity
0,0.0,2.534145,1.0,0.312688,0.792397,0.432896
1,0.0,2.379947,1.0,0.321940,0.766199,0.486923
2,0.0,2.078550,1.0,0.331216,0.688449,0.456029
3,0.0,2.509767,1.0,0.309246,0.776136,0.452655
4,0.0,3.586821,1.0,0.204691,0.734189,0.361123
...,...,...,...,...,...,...
18475,5.0,3.011821,60.0,0.348480,1.049561,0.490477
18476,5.0,4.016572,60.0,0.262352,1.053755,0.395392
18477,5.0,3.212684,60.0,0.319431,1.026230,0.490811
18478,5.0,4.230846,60.0,0.256146,1.083715,0.378786


In [20]:
df_combined.isna().sum()

Y              0
MOE            0
Participant    0
EAR            0
MAR            0
Circularity    0
dtype: int64

In [21]:
df_combined.duplicated().sum()

13

In [22]:
#Removing the duplicate values
t = df_combined.drop_duplicates("Participant")
df_combined = df_combined[~df_combined.Participant.isin(t[t.Y > 0].Participant)]
df_combined.shape


(17280, 6)

In [None]:
class Standardization:
    
    def __init__(self, df_combined):
        self.df_combined = df_combined

    #Functions for getting mean and std of each feature
    def calculate_Standardization(self):
        def mean_EAR(respondent):
            return df_means.loc[respondent]["EAR"]

        def mean_MAR(respondent):
            return df_means.loc[respondent]["MAR"]

        def mean_Circularity(respondent):
            return df_means.loc[respondent]["Circularity"]

        def mean_MOE(respondent):
            return df_means.loc[respondent]["MOE"]

        def std_EAR(respondent):
            return df_std.loc[respondent]["EAR"]

        def std_MAR(respondent):
            return df_std.loc[respondent]["MAR"]

        def std_Circularity(respondent):
            return df_std.loc[respondent]["Circularity"]

        def std_MOE(respondent):
            return df_std.loc[respondent]["MOE"]
        
       #Separating the rows which are "Alert" only
        df_state = self.df_combined[self.df_combined["Y"] == 0] 
        
        #Creating separate dataframes for each participants "Alert" state
        df_state_1 = df_state.iloc[0::240, :]
        df_state_2 = df_state.iloc[1::240, :]
        df_state_3 = df_state.iloc[2::240, :]
        
        #combining all "Alert" state into one dataframe
        state_first3 = [df_state_1,df_state_2,df_state_3]
        df_state_first3 = pd.concat(state_first3)
        df_state_first3 = df_state_first3.sort_index()
        
        # calculating per participant the mean and std for each feature
        pd.options.mode.chained_assignment = None
        df_means = df_state_first3.groupby("Participant")[["EAR", "MAR", "Circularity", "MOE"]].mean()
        df_std = df_state_first3.groupby("Participant")[["EAR", "MAR", "Circularity", "MOE"]].std()
        
        #Adding participant wise mean and std for each feature to each row in the original dataframe
        self.df_combined["EAR_mean"] = self.df_combined["Participant"].apply(mean_EAR)
        self.df_combined["MAR_mean"] = self.df_combined["Participant"].apply(mean_MAR)
        self.df_combined["Circularity_mean"] = self.df_combined["Participant"].apply(mean_Circularity)
        self.df_combined["MOE_mean"] = self.df_combined["Participant"].apply(mean_MOE)

        self.df_combined["EAR_std"] = self.df_combined["Participant"].apply(std_EAR)
        self.df_combined["MAR_std"] = self.df_combined["Participant"].apply(std_MAR)
        self.df_combined["Circularity_std"] = self.df_combined["Participant"].apply(std_Circularity)
        self.df_combined["MOE_std"] = self.df_combined["Participant"].apply(std_MOE)
        self.df_combined.head()
        print(self.df_combined.shape)
        
        #Normalizing the values
        self.df_combined["EAR_N"] = (self.df_combined["EAR"] - self.df_combined["EAR_mean"]) / self.df_combined["EAR_std"]
        self.df_combined["MAR_N"] = (self.df_combined["MAR"] - self.df_combined["MAR_mean"]) / self.df_combined["MAR_std"]
        self.df_combined["Circularity_N"] = (self.df_combined["Circularity"] - self.df_combined["Circularity_mean"]) / self.df_combined["Circularity_std"]
        self.df_combined["MOE_N"] = (self.df_combined["MOE"] - self.df_combined["MOE_mean"]) / self.df_combined["MOE_std"]
        return self.df_combined

### Passing merged dataframe to Standardization class

In [25]:
from drowsiness_standardisation import Standardization
pd.options.mode.chained_assignment = None

# Standardization
df_combined = Standardization(df_combined).calculate_Standardization()
df_combined.head()

(17280, 14)


Unnamed: 0,Y,MOE,Participant,EAR,MAR,Circularity,EAR_mean,MAR_mean,Circularity_mean,MOE_mean,EAR_std,MAR_std,Circularity_std,MOE_std,EAR_N,MAR_N,Circularity_N,MOE_N
0,0.0,2.534145,1.0,0.312688,0.792397,0.432896,0.321948,0.749015,0.458616,2.330881,0.009264,0.054062,0.027106,0.231727,-0.999551,0.802437,-0.948848,0.877172
1,0.0,2.379947,1.0,0.32194,0.766199,0.486923,0.321948,0.749015,0.458616,2.330881,0.009264,0.054062,0.027106,0.231727,-0.000897,0.317858,1.044306,0.211742
2,0.0,2.07855,1.0,0.331216,0.688449,0.456029,0.321948,0.749015,0.458616,2.330881,0.009264,0.054062,0.027106,0.231727,1.000448,-1.120295,-0.095458,-1.088914
3,0.0,2.509767,1.0,0.309246,0.776136,0.452655,0.321948,0.749015,0.458616,2.330881,0.009264,0.054062,0.027106,0.231727,-1.371052,0.501666,-0.21992,0.77197
4,0.0,3.586821,1.0,0.204691,0.734189,0.361123,0.321948,0.749015,0.458616,2.330881,0.009264,0.054062,0.027106,0.231727,-12.657205,-0.274242,-3.596686,5.419921


In [26]:
# Saving the file to a CSV with all the information
df_combined.to_csv(r'KNN_Dataset/whole_info.csv',index=False)

# Saving the file to a CSV with all the information
df_main = df_combined.drop(["EAR_mean","MAR_mean", "Circularity_mean", "MOE_mean", "EAR_std", "MAR_std", "Circularity_std", "MOE_std"], axis=1)
df_main.to_csv(r'KNN_Dataset/normalized_data.csv',index=False)