In [7]:
import os
import pandas as pd

# Directory paths
raw_data_dir = '/home/vault/empkins/tpD/D02/RCT/raw_data'
processed_data_dir = '/home/vault/empkins/tpD/D02/processed_data/SonyCam_processed_data'

def find_csv_files(directory, suffix='_app.csv'):
    csv_files = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith(suffix):
                csv_files.append(os.path.join(root, file))
    return csv_files

def check_patient_presence(patient_id, directory):
    return os.path.exists(os.path.join(directory, patient_id))

app_csv_files = find_csv_files(raw_data_dir)
app_csv_data = [(os.path.basename(file).split('_')[0], file) for file in app_csv_files]
app_df = pd.DataFrame(app_csv_data, columns=['Patient ID', 'App CSV Path'])

app_df['Presence'] = app_df['Patient ID'].apply(lambda x: check_patient_presence(x, processed_data_dir))

def find_facemo_csv_files(patient_id, directory):
    patient_dir = os.path.join(directory, patient_id)
    
    for root, dirs, files in os.walk(patient_dir):
        for dir_name in dirs:
            if '_t2_' in dir_name:
                facemo_csv_dir = os.path.join(patient_dir, 'raw_variables', dir_name, 'facial', 'face_au')
                facemo_csv_files = find_csv_files(facemo_csv_dir, suffix='_facau.csv')
                if facemo_csv_files:
                    return facemo_csv_files[0]  # Return the first matching file
    return None  # Return None if no '_facemo.csv' file is found

facemo_csv_data = [(patient_id, find_facemo_csv_files(patient_id, processed_data_dir)) for patient_id in app_df['Patient ID']]
facemo_df = pd.DataFrame(facemo_csv_data, columns=['Patient ID', 'Facemo CSV Path'])

facemo_df['Presence'] = facemo_df['Facemo CSV Path'].notnull()

final_df = pd.merge(app_df, facemo_df, on='Patient ID', how='outer')

In [8]:
final_df.to_csv('patient_id_paths_au.csv', index=False)