In [1]:
# Count tracks & videos in excel files and supp. material
import os
from collections import defaultdict
import time
import re
import sys
import pandas as pd
import logging

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
    handlers=[
        logging.FileHandler(r'C:\Users\pc\Desktop\mov_sist\results.txt'),
        logging.StreamHandler(sys.stdout)
    ]
)

t = time.time()

walk_dir = r'C:\Users\pc\Desktop\mov_sist\valid_tracks'

logging.info('walk_dir = ' + walk_dir)
# If your current working directory may change during script execution, it's recommended to
# immediately convert program arguments to an absolute path. Then the variable root below will
# be an absolute path as well.
walk_dir = os.path.abspath(walk_dir)
logging.info('walk_dir (absolute) = ' + os.path.abspath(walk_dir))
excl_list = defaultdict(list)
excl_merged = defaultdict(list)
tracks_quantity = defaultdict()
numreps = defaultdict()
numcells = defaultdict(defaultdict().copy)
max_min_cells = defaultdict(defaultdict().copy)
left_n = defaultdict()
right_n = defaultdict()

for root, subdirs, files in os.walk(walk_dir):
    logging.info(f'--\nNow in root = {root}')
    logging.info(f'--\nThere are {len(files)} files in {root}')
    
    for subdir in subdirs:
        logging.info('\t- subdirectory ' + subdir)
    
    for filename in files:
        scenario = root.rsplit('\\',1)[1]
        file_path = os.path.join(root, filename)
        serie = pd.read_excel(file_path, header=None)
        if len(serie)%3600 != 0: # detect misshaped xlsx files
            logging.critical(f'\t- length of {filename} is not a multiple of 3600!')
            raise SystemExit(f'\t- length of {filename} is not a multiple of 3600!')
        else:
            excl_list[scenario].append(serie)
        numcells[scenario][filename] = len(serie)/3600
        o = serie.iloc[0::3600,4] #Coordenadas X iniciales de cada célula en esa serie
        f = serie.iloc[-1::-3600,4].iloc[::-1] #Coordenadas X finales de cada célula en esa serie
        x_displacement_serie = [f_i-o_i for o_i,f_i in zip(o.tolist(), f.tolist())]
        if any(ele == 0 for ele in x_displacement_serie): # detect cells with no displacement
            logging.critical(f'\t- cell(s) nº {[idx for idx, val in enumerate(x_displacement_serie) if val == 0]} have no displacement')
            raise SystemExit(f'\t- cell(s) nº {[idx for idx, val in enumerate(x_displacement_serie) if val == 0]} have no displacement')
        left_n[filename] = sum(1 for value in x_displacement_serie if value < 0 )
        right_n[filename] = sum(1 for value in x_displacement_serie if value > 0 )
        logging.info(f'''\t- video {filename} contains {numcells[scenario][filename]} tracks, of which
        {left_n[filename]} end on the left side and {right_n[filename]} end on the right side, with ratios
        {(left_n[filename]/len(x_displacement_serie))*100}% left and {(right_n[filename]/len(x_displacement_serie))*100}% right''')
    if files: # If folder contains any files do...
        max_min_cells[scenario]['max'] = max(numcells[scenario].values())
        max_min_cells[scenario]['min'] = min(numcells[scenario].values())
        logging.info(f'Max and min number of cells per rep in {scenario} are {max_min_cells[scenario]["min"]}-{max_min_cells[scenario]["max"]}')
        excl_merged[scenario] = pd.concat(excl_list[scenario], ignore_index=True)
        numreps[scenario] = len(files)
        logging.info(f'{scenario} has {numreps[scenario]} replicates (videos)')
        tracks_quantity[scenario] = len(excl_merged[scenario])/3600
        logging.info(f'{scenario} contains {tracks_quantity[scenario]} tracks')
        a = excl_merged[scenario].iloc[0::3600,4] #Coordenadas X iniciales de cada célula en ese escenario
        b = excl_merged[scenario].iloc[-1::-3600,4].iloc[::-1] #Coordenadas X finales de cada célula en ese escenario
        x_displacement_scenario = [b_i-a_i for a_i,b_i in zip(a.tolist(), b.tolist())]
        logging.info(f'Tracks in {scenario} displacement values:{chr(10)}{chr(10).join(map(str,x_displacement_scenario))}')
        left_n[scenario] = sum(1 for value in x_displacement_scenario if value < 0 )
        right_n[scenario] = sum(1 for value in x_displacement_scenario if value > 0 )
        logging.info(f'{scenario} has {right_n[scenario]} cells to the right')
        logging.info(f'{scenario} has {left_n[scenario]} cells to the left')
        logging.info(f'{scenario} has {(left_n[scenario]/len(x_displacement_scenario))*100} left side ratio')
        logging.info(f'{scenario} has {(right_n[scenario]/len(x_displacement_scenario))*100} right side ratio')
    else:
        logging.info(f'\nThere are no files in root: {root}')

logging.info(f'''All the xlsx combined contain {sum((v for v in numreps.values()))}
recorded videos and {sum((len(v) for v in excl_merged.values()))/3600} tracks''')
elapsed = time.time() - t
logging.info(f'{elapsed} seconds elapsed')

# Count "leaf" folders (videos) in the supp. material directory
folders = []
supp_matrial = r'C:\Users\pc\Desktop\mov_sist\Supp_mat'
for root, dirs, files in os.walk(supp_matrial):
    if not dirs:
        folders.append(os.path.basename(root))
logging.info(f'There are {len(folders)} videos in Supp. Material')

2023-05-09 14:57:26,087 [INFO] walk_dir = C:\Users\pc\Desktop\mov_sist\valid_tracks
2023-05-09 14:57:26,098 [INFO] walk_dir (absolute) = C:\Users\pc\Desktop\mov_sist\valid_tracks
2023-05-09 14:57:26,101 [INFO] --
Now in root = C:\Users\pc\Desktop\mov_sist\valid_tracks
2023-05-09 14:57:26,103 [INFO] --
There are 0 files in C:\Users\pc\Desktop\mov_sist\valid_tracks
2023-05-09 14:57:26,105 [INFO] 	- subdirectory Amoeba borokensis
2023-05-09 14:57:26,106 [INFO] 	- subdirectory Amoeba proteus
2023-05-09 14:57:26,108 [INFO] 	- subdirectory Metamoeba leningradensis
2023-05-09 14:57:26,110 [INFO] 
There are no files in root: C:\Users\pc\Desktop\mov_sist\valid_tracks
2023-05-09 14:57:26,115 [INFO] --
Now in root = C:\Users\pc\Desktop\mov_sist\valid_tracks\Amoeba borokensis
2023-05-09 14:57:26,117 [INFO] --
There are 0 files in C:\Users\pc\Desktop\mov_sist\valid_tracks\Amoeba borokensis
2023-05-09 14:57:26,122 [INFO] 	- subdirectory Comprobacion borokensis 11.63
2023-05-09 14:57:26,123 [INFO] 	-