In [7]:
from data_load.loader import ResultsLoader, TextLoader, AudioLoader, FaceLoader

# Initialize loaders
results_loader = ResultsLoader()
text_loader = TextLoader() 
audio_loader = AudioLoader()
face_loader = FaceLoader()

# Get balanced subset of data (20% of total data)
percentage = 0.02
random_state = 42


In [8]:

# Load results data
results_df = results_loader.get_data(percentage=percentage, random_state=random_state)
print("\nResults data shape:", results_df.shape)
print("\nResults data preview:")
print(results_df.head())



Results data shape: (2, 1)

Results data preview:
     PHQ_Binary
ID             
386           1
391           0


In [9]:

# Load text features
text_df = text_loader.get_data(percentage=percentage, random_state=random_state)
print("\nText features shape:", text_df.shape)
print("\nText features preview:")
print(text_df.head())



Text features shape: (2, 1)

Text features preview:
                                       TRANSCRIPT_text
ID                                                    
386  synch introv4confirmation hi im ellie thanks f...
391  sync introv4confirmation hi im ellie thanks fo...


In [10]:

# Load audio features with 50ms downsampling and 10s rolling window
audio_df = audio_loader.get_data(
    percentage=percentage,
    random_state=random_state,
    ds_freq="10s",
    rw_size="10s"
)
print("\nAudio features shape:", audio_df.shape)
print("\nAudio features preview:")
print(audio_df.head())



Audio features shape: (173, 80)

Audio features preview:
                     AUDIO_AMPLITUDE  FORMANT_F1  FORMANT_F2  FORMANT_F3  \
ID  TIMESTAMP                                                              
386 0 days 00:00:00        -0.000048  616.206600  1571.05937  2554.39041   
    0 days 00:00:10        -0.000021  719.887378  1726.74306  2604.00720   
    0 days 00:00:20        -0.000019  612.623780  1657.20697  2569.88200   
    0 days 00:00:30        -0.000018  536.712790  1657.41013  2599.18730   
    0 days 00:00:40        -0.000022  470.127492  1633.53475  2605.33350   

                     FORMANT_F4  FORMANT_F5  COVAREP_F0  COVAREP_VUV  \
ID  TIMESTAMP                                                          
386 0 days 00:00:00   3445.9205   4352.0802  245.137862     0.135864   
    0 days 00:00:10   3483.4309   4403.4083  250.681000     0.045000   
    0 days 00:00:20   3456.0944   4344.0295  248.715000     0.447000   
    0 days 00:00:30   3511.1177   4367.7521  234.

In [12]:

# Load face features with 50ms downsampling and 10s rolling window
face_df = face_loader.get_data(
    percentage=percentage,
    random_state=random_state,
    ds_freq="10s",
    rw_size="10s"
)
print("\nFace features shape:", face_df.shape)
print("\nFace features preview:")
print(face_df.head())



Face features shape: (173, 4858)

Face features preview:
                     CLNFgaze_frame  CLNFgaze_confidence  CLNFgaze_success  \
ID  TIMESTAMP                                                                
386 0 days 00:00:00           150.5             0.843113          0.823333   
    0 days 00:00:10           450.5             0.862236          0.853333   
    0 days 00:00:20           750.5             0.976928          1.000000   
    0 days 00:00:30          1050.5             0.980110          1.000000   
    0 days 00:00:40          1350.5             0.981177          1.000000   

                     CLNFgaze_x_0  CLNFgaze_y_0  CLNFgaze_z_0  CLNFgaze_x_1  \
ID  TIMESTAMP                                                                 
386 0 days 00:00:00      0.020882      0.114932     -0.985159     -0.187369   
    0 days 00:00:10      0.060876      0.158891     -0.979911     -0.185456   
    0 days 00:00:20      0.048942      0.159756     -0.984069     -0.185223   
