In [9]:
import os
import pandas as pd
import glob
import warnings

# Suppress warnings that are not important
warnings.filterwarnings("ignore")

# Define the folder where your data files are located
folder_path = r'C:\Users\fids\OneDrive - H. Lundbeck A S\Documents\GitHub\FIDS-analysis\Test folder'

# Create 'csv_files' directory for where we want to save our files if it doesn't exist
csv_files_dir = os.path.join(folder_path, 'csv_files')
if not os.path.exists(csv_files_dir):
    os.mkdir(csv_files_dir)

# Get a list of all .xlsx files in the folder
xlsx_files = glob.glob(os.path.join(folder_path, '*.xlsx'))

#Print the files for analysis
print(xlsx_files)

['C:\\Users\\fids\\OneDrive - H. Lundbeck A S\\Documents\\GitHub\\FIDS-analysis\\Test folder\\m74n2.xlsx', 'C:\\Users\\fids\\OneDrive - H. Lundbeck A S\\Documents\\GitHub\\FIDS-analysis\\Test folder\\m74n3.xlsx', 'C:\\Users\\fids\\OneDrive - H. Lundbeck A S\\Documents\\GitHub\\FIDS-analysis\\Test folder\\~$m74n1.xlsx']


In [15]:
# Loop over each .xlsx file in the folder
for file_path in xlsx_files:
    # Load the data
    df = pd.read_excel(file_path)
    
    # Check if the 'Inst. Freq. (Hz)' column exists in the DataFrame
    if 'Peak-to-Peak Frequency (Hz)' not in df.columns:
        print(f"Skipping {file_path} as it doesn't contain 'Peak-to-Peak Frequency (Hz)' column.")
        continue  # Skip this file and move to the next one
        
    # Create 'ten_min_bin' DataFrame
    ten_min_bin = pd.DataFrame() 
    
    # Calculate the number of events, mean_freq, and CV_freq for each 600,000 bin
    #find max number for rows to determine the range
    max_time = int(df['Time of Peak (ms)'].max())
    #Add the max needed number to the range to ensure we don't cut the bin and get an error
    bins = range(0, max_time + 600000, 600000)
    #Setting the conditions for the for loop
    for bin_start, bin_end in zip(bins[:-1], bins[1:]):
        #defining the mask, which are the boundaries for the bins to be looped over and updated for each calcualtd row
        mask = (df['Time of Peak (ms)'] >= bin_start) & (df['Time of Peak (ms)'] < bin_end)
        #Find the sum of rows in the mask
        n_of_events = mask.sum()
        #Adding mean frequency
        mean_freq = df[mask]['Peak-to-Peak Frequency (Hz)'].mean()
        #Applying the CV formula within the bounderies for the mask
        cv_freq = (df[mask]['Peak-to-Peak Frequency (Hz)'].std() / df[mask]['Peak-to-Peak Frequency (Hz)'].mean()) * 100
        #Making everything into a dataframe
        ten_min_bin = ten_min_bin.append({'n_of_events': n_of_events, 'mean_freq': mean_freq, 'CV_freq': cv_freq}, ignore_index=True)

   # Handle the last ten-minute bin (if it doesn't contain a full 600,000 milliseconds)
    #Find the last bin
    last_bin_start = bins[-1]
    # Extend it to be over 10 min to ensure it becomes included
    last_bin_end = max_time + 600000
    #Add the same components to the dataframe IF the last bin doesn't already exist (this is only to combat cases where the last bin isn't added as there is too little information)
    if last_bin_end <= max_time:
        mask = (df['Time of Peak (ms)'] >= last_bin_start) & (df['Time of Peak (ms)'] < last_bin_end)
        n_of_events = mask.sum()
        mean_freq = df[mask]['Peak-to-Peak Frequency (Hz)'].mean()
        cv_freq = (df[mask]['Peak-to-Peak Frequency (Hz)'].std() / df[mask]['Peak-to-Peak Frequency (Hz)'].mean()) * 100
        ten_min_bin = ten_min_bin.append({'n_of_events': n_of_events, 'mean_freq': mean_freq, 'CV_freq': cv_freq}, ignore_index=True)

        
### One min bin section ###

    # Create 'one_min_bins' DataFrame
    one_min_bins = pd.DataFrame()

    # Calculate for 1-minute bins
    bins = range(0, max_time + 60000, 60000)
    for bin_start, bin_end in zip(bins[:-1], bins[1:]):
        mask = (df['Time of Peak (ms)'] >= bin_start) & (df['Time of Peak (ms)'] < bin_end)
        n_of_events = mask.sum()
        mean_freq = df[mask]['Peak-to-Peak Frequency (Hz)'].mean()
        cv_freq = (df[mask]['Peak-to-Peak Frequency (Hz)'].std() / df[mask]['Peak-to-Peak Frequency (Hz)'].mean()) * 10
        one_min_bins = one_min_bins.append({'n_of_events': n_of_events, 'mean_freq': mean_freq, 'CV_freq': cv_freq}, ignore_index=True)

    # Handle the last one-minute bin (if it doesn't contain a full 60,000 milliseconds)
    last_bin_start = bins[-1]
    last_bin_end = max_time + 60000  # Extend it to the maximum time in the data
    if last_bin_end <= max_time:
        mask = (df['Time of Peak (ms)'] >= last_bin_start) & (df['Time of Peak (ms)'] < last_bin_end)
        n_of_events = mask.sum()
        mean_freq = df[mask]['Peak-to-Peak Frequency (Hz)'].mean()
        cv_freq = (df[mask]['Peak-to-Peak Frequency (Hz)'].std() / df[mask]['Peak-to-Peak Frequency (Hz)'].mean()) * 10
        one_min_bins = one_min_bins.append({'n_of_events': n_of_events, 'mean_freq': mean_freq, 'CV_freq': cv_freq}, ignore_index=True)

### Saving section ####

    # Get the base file name (without extension)
    base_file_name = os.path.splitext(os.path.basename(file_path))[0]

   # Create '10_min_bins' and '1_min_bins' subfolders in 'csv_files'
    ten_min_bins_dir = os.path.join(csv_files_dir, '10_min_bins')
    one_min_bins_dir = os.path.join(csv_files_dir, '1_min_bins')
    
    if not os.path.exists(ten_min_bins_dir):
        os.mkdir(ten_min_bins_dir)
    
    if not os.path.exists(one_min_bins_dir):
        os.mkdir(one_min_bins_dir)

    # Save the 10 min bin DataFrame as a .csv in '10_min_bins' subfolder
    save_path_10_min_bins = os.path.join(ten_min_bins_dir, f'{base_file_name}_10_min_bins.csv')
    ten_min_bin.to_csv(save_path_10_min_bins)
    print(f'{base_file_name}_10_min_bins has been saved here: {save_path_10_min_bins}')

    # Save the 1 min bin DataFrame as a .csv in '1_min_bins' subfolder
    save_path_1_min_bins = os.path.join(one_min_bins_dir, f'{base_file_name}_1_min_bins.csv')
    one_min_bins.to_csv(save_path_1_min_bins)
    print(f'{base_file_name}_1_min_bins has been saved here: {save_path_1_min_bins}')

AttributeError: 'DataFrame' object has no attribute 'append'

In [None]:
#m46n4_0-20min.xlsx and m53n1_0-37min.xlsx skipped due to the 'Inst. Freq. (Hz)' column not existing

In [20]:
print(pd.__version__)

2.0.3
