In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import glob
import seaborn as sns

### Function that will normalize the way the date is written on the files


In [2]:
def normalize_date(date_str):
    from datetime import datetime
    try:
        # Attempt to parse the date in various formats
        date_formats = ['_%Y_%m_%d', '_%y_%m_%d', '_%y_%m_%d']
        for fmt in date_formats:
            try:
                date_obj = datetime.strptime(date_str, fmt)
                return date_obj.strftime('%Y-%m-%d')
            except ValueError:
                pass  # Continue to next format
        # If none of the formats match, return None
        return None
    except Exception as e:
        print(f"Error occurred while parsing date: {e}")
        return None


### Function that will read the .dat files


In [3]:
def read_dat_file(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()
    
    # Find the index of the line that says '#BEGIN DATA'
    data_start_index = next(i for i, line in enumerate(lines) if '#BEGIN DATA' in line)
    
    # Initialize lists to store trial data
    start_zones = []
    end_zones = []
    stops = []
    prods = []
    
    # Process lines after '#BEGIN DATA'
    i = data_start_index + 1
    while i < len(lines):
        line = lines[i].strip().split(',')
        if len(line) < 2:
            i += 1
            continue

        # Parse the zone and value
        zone = line[1].strip()
        
        if zone == 'EOF':
            break
        elif zone == 'n':
            # Skip trial if it contains 'n'
            i += 1
            continue
        elif zone == 'f':
            # Confirm end of trial, move to next trial
            i += 1
            continue
        elif zone.isdigit():
            zone = int(zone)
            if len(start_zones) == len(end_zones):
                start_zones.append(zone)
                stops.append(0)
                prods.append(0)
            else:
                end_zones.append(zone)
        elif zone == 's':
            stops[-1] += 1
        elif zone == 'p':
            prods[-1] += 1
        i += 1
    
    # Create a DataFrame from the collected data
    trial_data = pd.DataFrame({
        'Start_Zone': start_zones,
        'End_Zone': end_zones,
        'Stops': stops,
        'Prods': prods
    })
    
    return trial_data


In [None]:
print(read_dat_file(".dat file_path"))

## Here we will input the rewarded zone.
### With the function ErrorScore we will determine the difference between the end zone (second in each pair) and the Rewarded Zone (RewZone)


In [None]:
RewZone = X

In [5]:
def ErrorScore(data_path, RewZone):
    errors = []
    trial_data = read_dat_file(data_path)
    for index, row in trial_data.iterrows():
        start_zone = row['Start_Zone']
        end_zone = row['End_Zone']
        
        if end_zone == RewZone:
            error = 0
        else:
            error = (end_zone - RewZone) % 8
            if error > 4:
                error -= 8
        
        errors.append(error)
    
    trial_data['Error'] = errors
    return trial_data


In [None]:
ErrorScoreData = ErrorScore(".dat file_path", RewZone)
print(ErrorScoreData)

### Iteration function
#### This function will iterate through a folder of your choice and look for .dat files whose name starts with the string you also input, which should be the ID of the animal. 
#### The function will create txt files that will have the vector created by ErrorScore


In [6]:
def process_files_in_folder(folder_path, RewZone):
    file_prefix = 'RMB'
    # Find all .dat files starting with the given prefix in the specified folder
    search_pattern = os.path.join(folder_path, f'{file_prefix}*.dat')
    dat_files = glob.glob(search_pattern)

    all_results = []

    for dat_file in dat_files:
        # Read data from the .dat file
        trial_data = read_dat_file(dat_file)
        
        if not trial_data.empty:
            # Calculate error scores and create DataFrame
            df = ErrorScore(dat_file, RewZone)
            
            # Extract the date from the filename
            base_name = os.path.basename(dat_file)
            date_part = base_name.replace(file_prefix, '').replace('.dat', '')
            normalized_day = normalize_date(date_part)
            # Add Date column to DataFrame
            df.insert(0, 'Date', normalized_day)
            
            # Append DataFrame to all_results
            all_results.append(df)

    # Concatenate all DataFrames into one
    result_df = pd.concat(all_results, ignore_index=True)

    # Add Date and Trial indices
    result_df['Date_Index'] = result_df.groupby('Date').ngroup() + 1
    result_df['Trial_Index'] = result_df.groupby('Date').cumcount() + 1
    
    # Save the DataFrame to a CSV file
    output_filename = os.path.join(folder_path, f'MEA_Results_{os.path.basename(folder_path)}.csv')
    result_df.to_csv(output_filename, index=False)
    print(f"Results saved to {output_filename}")
    
    classify_errors(output_filename)


### Use example

In [None]:
folder_path = 'X:\\MATT_SCORING'  # Replace with the path to your folder
file_prefix = 'RMB4'  # Example file prefix to look for
RewZone = 5  # Example value for RewZone
process_files_in_folder(folder_path, file_prefix, RewZone)


## Combine CSV files from subfolders

In [7]:
def combine_csv_files(root_folder):
    all_data = []
    
    for subdir, dirs, files in os.walk(root_folder):
        for file in files:
            if file.endswith(".csv"):
                file_path = os.path.join(subdir, file)
                df = pd.read_csv(file_path)
                animal_id = os.path.basename(subdir).split('_')[0]
                df['Animal_ID'] = animal_id
                all_data.append(df)
    
    if all_data:
        combined_df = pd.concat(all_data, ignore_index=True)
        combined_filename = os.path.join(root_folder, 'combined_data.csv')
        combined_df.to_csv(combined_filename, index=False)
        print(f"Combined data saved to {combined_filename}")
    else:
        print("No CSV files found in the specified folder.")

root_folder = 'X:/MATT_SCORING'  # Replace with the root path to your folder containing all the CSVs
combine_csv_files(root_folder)


Combined data saved to X:/MATT_SCORING\combined_data.csv


## Fit linear model to determine if there are differences between groups  types of errors

In [None]:
# Fit linear mixed model
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf

# Load the combined dataset
combined_data = pd.read_csv("X:/MATT_SCORING/combined_data.csv")

# Convert Animal_ID, Age, and Group to categorical data type
combined_data['Animal_ID'] = combined_data['Animal_ID'].astype('category')
combined_data['Age'] = combined_data['Age'].astype('category')
combined_data['Group'] = combined_data['Group'].astype('category')

# Define the formula for the linear mixed model
formula = 'Error ~ C(Age) * C(Group)'

# Fit the linear mixed model
model = smf.mixedlm(formula, combined_data, groups=combined_data['Animal_ID'])
result = model.fit()

# Print the summary of the model
print(result.summary())
