# 3/4: Data preprocessing 2
By Niloufar Shahdoust (niloufar.shahdoust@utah.edu)

In [1]:
import os
import mat73
import numpy as np
import pandas as pd
import random
from matplotlib import cm
from matplotlib import colormaps
from ast import literal_eval
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize
from visbrain.objects import BrainObj, SceneObj, SourceObj
from matplotlib.colors import to_hex
from PIL import Image

## reading data

In [2]:
input_folder = '2_brain_visualization_preProcessing_1'
output_folder = '3_brain_visualization_preProcessing_2'

# Collect all CSV files from the input folder
csv_files = [f for f in os.listdir(input_folder) if f.endswith('.csv')]

# Load data into DataFrames
df_patients = []
for file_name in csv_files:
    file_path = os.path.join(input_folder, file_name)
    df = pd.read_csv(file_path)
    df_patients.append(df)

# Generate a colormap for regions
area_per_patient = [df['area'].unique() for df in df_patients]
area_all_patients = pd.unique(np.concatenate(area_per_patient)).tolist()

# Generate distinct colors using HSL color space
num_colors = len(area_all_patients)
colors = [plt.cm.hsv(i / num_colors) for i in range(num_colors)]  # Evenly spaced hues
region_colors = {region: to_hex(colors[i]) for i, region in enumerate(area_all_patients)}

## taking a look at areas

In [3]:
region_colors

{'TrIFG triangular part of the inferior frontal gyrus': '#ff0000',
 'MFG middle frontal gyrus': '#ff1800',
 'MTG middle temporal gyrus': '#ff2f00',
 'ACgG anterior cingulate gyrus': '#ff4d00',
 'FO frontal operculum': '#ff6400',
 'LOrG lateral orbital gyrus': '#ff8200',
 'Hippocampus': '#ff9a00',
 'Ent entorhinal area': '#ffb100',
 'FuG fusiform gyrus': '#ffcf00',
 'PHG parahippocampal gyrus': '#ffe600',
 'AOrG anterior orbital gyrus': '#f8fd00',
 'MCgG middle cingulate gyrus': '#e2ff00',
 'AIns anterior insula': '#cbff00',
 'Amygdala': '#adff00',
 'Inf Lat Vent': '#96ff00',
 'OrIFG orbital part of the inferior frontal gyrus': '#78ff00',
 'SFG superior frontal gyrus': '#61ff00',
 'MOrG medial orbital gyrus': '#43ff00',
 'ITG inferior temporal gyrus': '#2bff00',
 'LiG lingual gyrus': '#14ff00',
 'POrG posterior orbital gyrus': '#02ff0c',
 'PP planum polare': '#00ff21',
 'STG superior temporal gyrus': '#00ff3f',
 'CO central operculum': '#00ff57',
 'PrG precentral gyrus': '#00ff6e',
 'MS

## preprocessing 2:
I want to take a couple of brain areas ONLY and save those areas for each patient.

In [4]:
df_patients[0].head()

Unnamed: 0,Channel_Name,Nmm_atlas,area,coordinate_x,coordinate_y,coordinate_z
0,LOFC1,Right TrIFG triangular part of the inferior fr...,TrIFG triangular part of the inferior frontal ...,37.71687,32.353153,13.918516
1,LOFC2,Left MFG middle frontal gyrus,MFG middle frontal gyrus,-34.6146,33.788637,28.812653
2,LOFC3,Right MFG middle frontal gyrus,MFG middle frontal gyrus,49.31526,21.86168,33.689576
3,LOFC5,Left MTG middle temporal gyrus,MTG middle temporal gyrus,-63.272704,-14.200921,-18.179159
4,LOFC7,Right MFG middle frontal gyrus,MFG middle frontal gyrus,38.234987,30.466426,19.42425


In [5]:

input_folder = '2_brain_visualization_preProcessing_1'
output_folder = '3_brain_visualization_preProcessing_2'


##List of specific areas to keep

target_areas = [
    "Hippocampus",
    "PHG parahippocampal gyrus",
    "FuG fusiform gyrus",
    "MTG middle temporal gyrus",
    "AIns anterior insula",
    "Amygdala",
    "Caudate",
    "SFG superior frontal gyrus",
    "MFG middle frontal gyrus",
    "ACgG anterior cingulate gyrus",
    "PP planum polare"
]


# target_areas = [
#     "Hippocampus"
# ]



# Get list of CSV files in the input folder
csv_files = [f for f in os.listdir(input_folder) if f.endswith('.csv')]

# Process each CSV file
for file_name in csv_files:
    file_path = os.path.join(input_folder, file_name)
    
    # Load the CSV into a DataFrame
    df = pd.read_csv(file_path)
    
    # Filter the DataFrame based on the 'area' column
    if 'area' in df.columns:
        filtered_df = df[df['area'].isin(target_areas)].copy()  # Create a copy to avoid SettingWithCopyWarning
        
        # Add 'left' and 'right' columns initialized to 0
        filtered_df['left'] = 0
        filtered_df['right'] = 0
        
        # Update 'left' and 'right' columns based on 'Nmm_atlas' content
        if 'Nmm_atlas' in filtered_df.columns:
            filtered_df.loc[filtered_df['Nmm_atlas'].str.contains('left', case=False, na=False), 'left'] = 1
            filtered_df.loc[filtered_df['Nmm_atlas'].str.contains('right', case=False, na=False), 'right'] = 1
        
        # Check if the DataFrame is not empty before saving
        if not filtered_df.empty:
            output_path = os.path.join(output_folder, file_name)
            filtered_df.to_csv(output_path, index=False)
            print(f"Filtered data saved to: {output_path}")
        else:
            print(f"Filtered DataFrame for {file_name} is empty. Skipping save.")
    else:
        print(f"Column 'area' not found in {file_name}. Skipping file.")

Filtered data saved to: 3_brain_visualization_preProcessing_2\201810.csv
Filtered data saved to: 3_brain_visualization_preProcessing_2\201811.csv
Filtered data saved to: 3_brain_visualization_preProcessing_2\201901.csv
Filtered data saved to: 3_brain_visualization_preProcessing_2\201902.csv
Filtered data saved to: 3_brain_visualization_preProcessing_2\201902r.csv
Filtered data saved to: 3_brain_visualization_preProcessing_2\201903.csv
Filtered data saved to: 3_brain_visualization_preProcessing_2\201905.csv
Filtered data saved to: 3_brain_visualization_preProcessing_2\201909.csv
Filtered data saved to: 3_brain_visualization_preProcessing_2\201910.csv
Filtered data saved to: 3_brain_visualization_preProcessing_2\201911.csv
Filtered data saved to: 3_brain_visualization_preProcessing_2\201913.csv
Filtered data saved to: 3_brain_visualization_preProcessing_2\201914.csv
Filtered data saved to: 3_brain_visualization_preProcessing_2\201915.csv
Filtered data saved to: 3_brain_visualization_preP