In [None]:
import os
import pandas as pd
from tqdm import tqdm
import re

class TextFileProcessor:
    def __init__(self, path):
        self.path = path

    def extract_subject_id(self, filename):
        """Extracts the subject ID from the filename."""
        parts = filename.split('_')
        for i, part in enumerate(parts):
            if part.startswith('FR1') or part.startswith('FR2'):
                return parts[i + 1] if i + 1 < len(parts) else None
        return None

    def extract_stimulation_values(self, lines):
        """Extracts stimulation values from the text."""
        values = []
        for i, line in enumerate(lines):
            if 'Effects of Stimulation' in line:
                if i + 1 < len(lines):
                    match1 = re.search(r'\((\d+\.\s*\d+)\s*%\)', lines[i + 1])
                    if match1:
                        values.append(float(match1.group(1).replace(" ", "")))
                if i + 2 < len(lines):
                    match2 = re.search(r'\((\d+\.\s*\d+)\s*%\)', lines[i + 2])
                    if match2:
                        values.append(float(match2.group(1).replace(" ", "")))
                break
        return values

    def extract_electrode_numbers(self, lines):
        """Extracts the anode and cathode numbers from the text."""
        anode, cathode = None, None
        for line in lines:
            match = re.search(r'e Stim\.\s+Anode-Cathode #:\s+(\d+)-(\d+)', line)
            if match:
                anode = int(match.group(1))
                cathode = int(match.group(2))
                break
        return anode, cathode

    def read_lines_from_file(self, filepath):
        """Reads lines from a text file."""
        try:
            with open(filepath, 'r') as file:
                return file.readlines()
        except Exception as e:
            print(f"Error reading file {filepath}: {e}")
            return []

    def process_files(self):
        """Processes all text files in the directory and extracts information."""
        data = []
        for filename in tqdm(os.listdir(self.path)):
            if filename.endswith('.txt'):
                filepath = os.path.join(self.path, filename)
                lines = self.read_lines_from_file(filepath)

                if len(lines) > 1:
                    subject_id = self.extract_subject_id(filename)
                    stimulation_values = self.extract_stimulation_values(lines)
                    anode, cathode = self.extract_electrode_numbers(lines)

                    if subject_id and stimulation_values:
                        data.append([
                            subject_id,
                            stimulation_values[0] if len(stimulation_values) > 0 else None,
                            stimulation_values[1] if len(stimulation_values) > 1 else None,
                            anode,
                            cathode
                        ])

        # Create a DataFrame with extracted data
        df = pd.DataFrame(data, columns=['Subject_ID', 'Stimulation_Value_1', 'Stimulation_Value_2', 'Anode', 'Cathode'])
        return df

In [None]:
processor = TextFileProcessor('/Volumes/OneTouch/datasets/Kahana_Epilepsy_iEEG/metadata/reports/data10_pdfs/RAM/review_pyper/pdf_txt_preprocessed')
df = processor.process_files()
display(df)

In [None]:
import os
import pandas as pd
from tqdm import tqdm
import re

class TextFileProcessor:
    def find_and_read_files(self, root, subject_id):
        """Finds and reads the RAW_coords.txt.mni file for the given subject ID."""
        subject_dir = os.path.join(root, 'subjects', subject_id, 'tal')
        raw_coords_path = os.path.join(subject_dir, 'RAW_coords.txt.mni')

        if os.path.exists(raw_coords_path):
            try:
                with open(raw_coords_path, 'r') as file:
                    return file.readlines()
            except Exception as e:
                print(f"Error reading RAW_coords.txt.mni for subject {subject_id}: {e}")
        return []

    def extract_coordinates(self, raw_coords, contact_number):
        """Extracts the x, y, z coordinates for the given contact number."""
        for line in raw_coords:
            parts = line.strip().split()
            if len(parts) == 4 and int(parts[0]) == contact_number:
                x, y, z = map(float, parts[1:])
                return x, y, z
        return None, None, None

    def process_raw_coords(self, root, df):
        """Processes RAW_coords.txt.mni files for each subject and adds coordinates to the DataFrame."""
        anode_coords = []
        cathode_coords = []

        for _, row in tqdm(df.iterrows(), total=len(df)):
            subject_id = row['Subject_ID']
            anode = row['Anode']
            cathode = row['Cathode']

            raw_coords = self.find_and_read_files(root, subject_id)

            anode_coords.append(self.extract_coordinates(raw_coords, anode))
            cathode_coords.append(self.extract_coordinates(raw_coords, cathode))

        # Add the extracted coordinates to the DataFrame
        df['Anode_Coords']   = anode_coords
        df['Cathode_Coords'] = cathode_coords

        return df


In [None]:
processor = TextFileProcessor()
df = processor.process_raw_coords('/Volumes/OneTouch/datasets/Kahana_Epilepsy_iEEG/metadata/reports/data10_coord_mni/RAM', df)
df

In [None]:
df.to_csv('/Volumes/OneTouch/datasets/Kahana_Epilepsy_iEEG/metadata/master_list_stim_effect.csv')