In [196]:
import librosa
import numpy as np
import pandas as pd
from scipy import interpolate

In [197]:
# Load the head pose cheat detection dataframe
head_pose_df = pd.read_csv('head_pose_cheat_detection.csv')

In [198]:
# Load the audio file
audio_file = 'G:/Exam-Proctoring-System/huangpi2.wav'
audio_data, sampling_rate = librosa.load(audio_file)

In [199]:
# Set the frame size and hop length (in samples)
frame_length = int(0.04 * sampling_rate)  # Frame size in samples (40ms)
hop_length = int(0.04 * sampling_rate)    # Hop length in samples (40ms, non-overlapping)

In [200]:
# Calculate the amplitude (e.g., using RMS) for each frame
amplitudes = librosa.feature.rms(y=audio_data, frame_length=frame_length, hop_length=hop_length)
print(amplitudes)

[[0.00819698 0.01091784 0.01082183 ... 0.00778981 0.00919653 0.0090942 ]]


In [201]:
# Set the threshold value in decibels
threshold = 0.011

In [202]:
# Create a binary representation
binary_representation = (amplitudes > threshold).astype(int)

In [203]:
# Get the frame indices (timestamps)
frame_indices = np.arange(binary_representation.shape[1])

In [204]:
# Convert frame indices to timestamps (in seconds)
timestamps_sec = frame_indices * (hop_length / sampling_rate)


In [205]:
# Create a new column in head_pose_df for the binary values
head_pose_df['sound_binary'] = np.nan  # Initialize with NaN

In [206]:
# Interpolate the binary values to match the timestamps in head_pose_df
video_timestamps = head_pose_df['Timestamp'].values / 1000  # Convert to seconds
interpolator = interpolate.interp1d(timestamps_sec, binary_representation.flatten(), kind='nearest', fill_value='extrapolate')
interpolated_binary = interpolator(video_timestamps)

In [207]:
# Update the 'sound_binary' column with the interpolated values
head_pose_df['sound_binary'] = interpolated_binary

In [211]:
# Save the updated dataframe (e.g., to a CSV file)
head_pose_df.to_csv('combined_data.csv', index=False)