In [1]:
import pandas as pd
import os

In [8]:
# Inputting files

input_file1 = r"C:\Users\modri\Desktop\Lab\runs from the same experiment\C2-position_spots1.csv"
input_file2 = r"C:\Users\modri\Desktop\Lab\runs from the same experiment\C2-position_spots2.csv"
input_file3 = r"C:\Users\modri\Desktop\Lab\runs from the same experiment\C2-position_spots3.csv"

input_filesA = [input_file1, input_file2]
input_filesB = [input_file1, input_file3]
input_filesC = [input_file2, input_file3]

parsed_input = [input_filesA, input_filesB, input_filesC]

In [9]:
def load_DataFrame(filepath: str) -> pd.DataFrame:
    """
    Loads a DataFrame from a file based on its extension.
    Supported formats: CSV, Excel, Feather, Parquet, HDF5, JSON.
    """

    _, ext = os.path.splitext(filepath.lower())

    try:
        if ext == '.csv':
            return pd.read_csv(filepath)
        elif ext in ['.xls', '.xlsx']:
            return pd.read_excel(filepath)
        elif ext == '.feather':
            return pd.read_feather(filepath)
        elif ext == '.parquet':
            return pd.read_parquet(filepath)
        elif ext in ['.h5', '.hdf5']:
            return pd.read_hdf(filepath)
        elif ext == '.json':
            return pd.read_json(filepath)
        else:
            raise ValueError(f"{ext} is not a supported file format.")
    except Exception as e:
        raise RuntimeError(f"Failed to load file '{filepath}': {e}")

In [10]:
def get_columns(df: pd.DataFrame) -> list:
    """
    Returns a list of column names from the DataFrame.
    """
    return df.columns.tolist()

In [33]:
track_id = 'TRACK_ID'
position_t = 'POSITION_T'
position_x = 'POSITION_X'
position_y = 'POSITION_Y'

In [36]:
def extract(df: pd.DataFrame, track_id: str, position_t: str, position_x: str, position_y: str, mirror_y: bool = True) -> pd.DataFrame:    

    df = df[[track_id, position_t, position_x, position_y]].apply(pd.to_numeric, errors='coerce').dropna()  # Corrected: use double brackets for column selection

    if mirror_y:
        '''
        Note: 
            For some reason, the y coordinates extracted from trackmate are mirrored. That ofcourse would not affect the statistical tests, only the data visualization. However, to not get mindfucked..
            Reflect y-coordinates around the midpoint for the directionality to be accurate, according to the microscope videos.
        '''
        y_mid = (df[position_y].min() + df[position_y].max()) / 2
        df[position_y] = 2 * y_mid - df[position_y]

    return df.rename(columns={track_id: 'Track ID', position_t: 'Time point', position_x: 'X coordinate', position_y: 'Y coordinate'})


display(load_DataFrame(input_file1))

display(extract(
    load_DataFrame(input_file1),
    track_id=track_id,
    position_t=position_t,
    position_x=position_x,
    position_y=position_y,
    mirror_y=True
    ))

Unnamed: 0,LABEL,ID,TRACK_ID,QUALITY,POSITION_X,POSITION_Y,POSITION_Z,POSITION_T,FRAME,RADIUS,...,MEAN_INTENSITY_CH1,MEDIAN_INTENSITY_CH1,MIN_INTENSITY_CH1,MAX_INTENSITY_CH1,TOTAL_INTENSITY_CH1,STD_INTENSITY_CH1,EXTRACK_P_STUCK,EXTRACK_P_DIFFUSIVE,CONTRAST_CH1,SNR_CH1
0,Label,Spot ID,Track ID,Quality,X,Y,Z,T,Frame,Radius,...,Mean intensity ch1,Median intensity ch1,Min intensity ch1,Max intensity ch1,Sum intensity ch1,Std intensity ch1,Probability stuck,Probability diffusive,Contrast ch1,Signal/Noise ratio ch1
1,Label,Spot ID,Track ID,Quality,X,Y,Z,T,Frame,R,...,Mean ch1,Median ch1,Min ch1,Max ch1,Sum ch1,Std ch1,P stuck,P diffusive,Ctrst ch1,SNR ch1
2,,,,(quality),(micron),(micron),(micron),(sec),,(micron),...,(counts),(counts),(counts),(counts),(counts),(counts),,,,
3,ID96513,96513,0,1.6490188837051392,993.0290217416159,699.6608159357883,0.0,85.0,85,2.5,...,5.648648648648648,8.0,0.0,8.0,209.0,3.0204508145512197,,,0.6651964829411263,1.4941260891231885
4,ID77824,77824,0,1.720259428024292,994.6400443331037,671.2184631580113,0.0,1.0,1,2.5,...,6.45945945945946,8.0,0.0,8.0,239.0,2.501050830202228,,,0.736873523839186,2.1914340823072602
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17028,ID108732,108732,546,1.0971896648406982,407.94267891866383,165.84884364902126,0.0,138.0,138,2.5,...,4.324324324324325,5.0,0.0,6.0,160.0,1.4917188827742405,,,0.7213555675094137,2.4296295811339115
17029,ID110124,110124,546,0.8179607391357422,408.02097977189607,170.375539587671,0.0,144.0,144,2.5,...,3.108108108108108,4.0,0.0,4.0,115.0,1.2863338037440575,,,0.712674857720334,2.010892918424936
17030,ID108959,108959,546,1.3304204940795898,408.01229211647376,168.65597293584094,0.0,139.0,139,2.5,...,4.918918918918919,6.0,0.0,6.0,182.0,2.0463840518552936,,,0.7597900407486704,2.075607627825034
17031,ID109406,109406,546,1.2695907354354858,405.00149258959044,168.76712790493374,0.0,140.0,140,2.5,...,4.972972972972973,6.0,0.0,6.0,184.0,2.0613707209643906,,,0.7080529125087028,2.000112318973893


Unnamed: 0,Track ID,Time point,X coordinate,Y coordinate
3,0.0,85.0,993.029022,185.427224
4,0.0,1.0,994.640044,213.869577
5,0.0,50.0,999.475491,199.005942
6,0.0,93.0,993.182373,183.022464
7,0.0,7.0,992.863023,209.271009
...,...,...,...,...
17028,546.0,138.0,407.942679,719.239196
17029,546.0,144.0,408.020980,714.712500
17030,546.0,139.0,408.012292,716.432067
17031,546.0,140.0,405.001493,716.320912


In [None]:
def parsed_file(input_files):  # File-reading

    if input_files is None:
        return pd.DataFrame()

    all_data_dflt = []
    for list_count, sublist in enumerate(input_files, start=1):  # Enumerate and cycle through input lists
        condition = list_count  # Assign a unique condition number for each list
        for file_count, file_dflt in enumerate(sublist, start=1):  # Enumerate and cycle through files in the sublist
            df = load_DataFrame(file_dflt)


            df['Condition'] = condition  # Assign the condition number
            df['Replicate'] = file_count  # Assign the replicate number

            all_data_dflt.append(df)  # Store processed DataFrame

    default = pd.concat(all_data_dflt, axis=0)  # Join the DataFrames
    return default

# Displayed data is the processed DataFrame
display(parsed_file(parsed_input))



Unnamed: 0,LABEL,ID,TRACK_ID,QUALITY,POSITION_X,POSITION_Y,POSITION_Z,POSITION_T,FRAME,RADIUS,...,MIN_INTENSITY_CH1,MAX_INTENSITY_CH1,TOTAL_INTENSITY_CH1,STD_INTENSITY_CH1,EXTRACK_P_STUCK,EXTRACK_P_DIFFUSIVE,CONTRAST_CH1,SNR_CH1,Condition,Replicate
0,Label,Spot ID,Track ID,Quality,X,Y,Z,T,Frame,Radius,...,Min intensity ch1,Max intensity ch1,Sum intensity ch1,Std intensity ch1,Probability stuck,Probability diffusive,Contrast ch1,Signal/Noise ratio ch1,1,1
1,Label,Spot ID,Track ID,Quality,X,Y,Z,T,Frame,R,...,Min ch1,Max ch1,Sum ch1,Std ch1,P stuck,P diffusive,Ctrst ch1,SNR ch1,1,1
2,,,,(quality),(micron),(micron),(micron),(sec),,(micron),...,(counts),(counts),(counts),(counts),,,,,1,1
3,ID96513,96513,0,1.6490188837051392,993.0290217416159,699.6608159357883,0.0,85.0,85,2.5,...,0.0,8.0,209.0,3.0204508145512197,,,0.6651964829411263,1.4941260891231885,1,1
4,ID77824,77824,0,1.720259428024292,994.6400443331037,671.2184631580113,0.0,1.0,1,2.5,...,0.0,8.0,239.0,2.501050830202228,,,0.736873523839186,2.1914340823072602,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15008,ID240088,240088,669,1.7251973152160645,762.0212225267923,99.97982261468995,0.0,134.0,134,2.5,...,0.0,8.0,227.0,2.8883113305774955,,,0.733580624045385,1.7976863980379276,3,2
15009,ID240888,240888,669,2.3195767402648926,762.559255185958,103.64853083467892,0.0,138.0,138,2.5,...,0.0,11.0,300.0,4.033083902643953,,,0.7004048582995951,1.6561858966429641,3,2
15010,ID242200,242200,669,2.839688539505005,756.9741037638419,111.60403595960548,0.0,142.0,142,2.5,...,0.0,13.0,372.0,4.4281043470212085,,,0.7533582466417533,1.9511212163688787,3,2
15011,ID241150,241150,669,2.358494281768799,765.7675918707351,107.9472218358911,0.0,139.0,139,2.5,...,0.0,11.0,320.0,3.4897963663627554,,,0.7262638717632551,2.0852850175910893,3,2
