In [1]:
import os
import pandas as pd
import re

Open the list of all submitted under `/uod/idr/filesets/idr0099-jain-beetlelightsheet/20201001-ftp`

In [2]:
rootdir = '/uod/idr/filesets/idr0099-jain-beetlelightsheet'
with open('idr0099-jain-beetlelightsheet-20201001-ftp.filelist.txt') as f:
    filelist = f.read().splitlines()

Get the list of unique parent directories

In [3]:
set(os.path.dirname(f) for f in filelist)

{'20201001-ftp/Akanksha_Jain_16-4-15_LifeAct-eGFP',
 '20201001-ftp/Akanksha_Jain_22-06-16_Tc-Squash-eGFP',
 '20201001-ftp/Akanksha_Jain_4-3-15_nGFP',
 '20201001-ftp/Akanksha_Jain_8-6-19_ZenKD_GAP43-eYFP',
 '20201001-ftp/Akanksha_Jain_9-3-15_Histone-eGFP'}

With reference to the preprint https://www.biorxiv.org/content/10.1101/744193v3.full.pdf this is the mapping of directories to published figures:

- `Akanksha_Jain_4-3-15_nGFP`: Figure 1: Inhomogeneous tissue expansion during Triboliumserosa morphogenesis.
- `Akanksha_Jain_9-3-15_Histone-eGFP`: Figure 2: Cell behaviors at the ventral leading edge of the serosa window are distinct from 508the behaviors in dorsal serosa.
- `Akanksha_Jain_16-4-15_LifeAct-eGFP`: Figure 3: Tension landscape in the expanding serosa
- `Akanksha_Jain_22-06-16_Tc-Squash-eGFP`: Figure 4: Emergence of a heterogeneous actomyosin cable at the serosa-embryonic boundary promotes cell eviction during serosa window closure.
- `Akanksha_Jain_8-6-19_ZenKD_GAP43-eYFP`: Figure5: Cell and tissue dynamics in Tc-zen1 knockdown embryos

In [4]:
figuredirs = [
    '20201001-ftp/Akanksha_Jain_4-3-15_nGFP',
    '20201001-ftp/Akanksha_Jain_9-3-15_Histone-eGFP',
    '20201001-ftp/Akanksha_Jain_16-4-15_LifeAct-eGFP',
    '20201001-ftp/Akanksha_Jain_22-06-16_Tc-Squash-eGFP',
    '20201001-ftp/Akanksha_Jain_8-6-19_ZenKD_GAP43-eYFP',
]
filelists = []
for figdir in figuredirs:
    filelists.append([os.path.basename(f) for f in filelist if f.startswith(f'{figdir}')])

Split filename into tokens, check for variable components of the filename

In [5]:
def tokenise(s):
    return re.split('[_,\.]', s)

for i, filelist in enumerate(filelists):
    print(f'{figuredirs[i]} ({filelist[0]} ...)')
    df = pd.DataFrame([tokenise(f) for f in filelist])
    for name, col in df.iteritems():
        unique = col.unique()
        if len(unique) > 1:
            print(f'  {name} {len(unique)}/{len(col)}, {unique[:5]}...')
# tokenised_filelists = [pd.DataFrame([tokenise(f) for f in filelist]) for filelist in filelists]

20201001-ftp/Akanksha_Jain_4-3-15_nGFP (TP0_Chgreen_Ill0_Ang0,1,2.tif ...)
  0 527/527, ['TP0' 'TP11' 'TP100' 'TP120' 'TP101']...
20201001-ftp/Akanksha_Jain_9-3-15_Histone-eGFP (TP0_Chgreen_Ill0_Ang0,1,2.tif ...)
  0 539/539, ['TP0' 'TP122' 'TP100' 'TP123' 'TP101']...
20201001-ftp/Akanksha_Jain_16-4-15_LifeAct-eGFP (img_TL0.tif ...)
  1 798/798, ['TL0' 'TL125' 'TL1' 'TL10' 'TL127']...
20201001-ftp/Akanksha_Jain_22-06-16_Tc-Squash-eGFP (TP0_Ch0_Ill0_Ang1,2,3,4,5.tif ...)
  0 212/212, ['TP0' 'TP133' 'TP100' 'TP135' 'TP101']...
20201001-ftp/Akanksha_Jain_8-6-19_ZenKD_GAP43-eYFP (fused_tp_0_ch_0.tif ...)
  2 142/142, ['0' '100' '101' '124' '102']...


Based on this create pattern files for varying `T`: `0..N-1` and symlink the original files into the directory used for the pattern file. From https://docs.openmicroscopy.org/bio-formats/5.9.0/formats/pattern-file.html Figure 5 which contains `tp_` will need to be renamed to remove the `_`:

In [6]:
re_pat = re.compile(r'([Tt][LPp])_?(\d+)')
for i, filelist in enumerate(filelists):
    f0 = filelist[0]
    d = f'Figure_{i + 1}'
    m = re_pat.search(f0)
    assert m
    pattern = re_pat.sub(f'\\1<0,{len(filelist)}>', f0).lower()
    pattern_file = re_pat.sub(r'\1', f0) + '.pattern'
    print(f0, pattern, pattern_file)
    os.mkdir(d)
    with open(os.path.join(d, pattern_file), 'w') as f:
        f.write(pattern + '\n')
    for f in filelist:
        src = os.path.join(rootdir, figuredirs[i], f)
        dst = os.path.join(d, re_pat.sub(r'\1\2', f))
        os.symlink(src, dst)


TP0_Chgreen_Ill0_Ang0,1,2.tif tp<0,527>_chgreen_ill0_ang0,1,2.tif TP_Chgreen_Ill0_Ang0,1,2.tif.pattern
TP0_Chgreen_Ill0_Ang0,1,2.tif tp<0,539>_chgreen_ill0_ang0,1,2.tif TP_Chgreen_Ill0_Ang0,1,2.tif.pattern
img_TL0.tif img_tl<0,798>.tif img_TL.tif.pattern
TP0_Ch0_Ill0_Ang1,2,3,4,5.tif tp<0,212>_ch0_ill0_ang1,2,3,4,5.tif TP_Ch0_Ill0_Ang1,2,3,4,5.tif.pattern
fused_tp_0_ch_0.tif fused_tp<0,142>_ch_0.tif fused_tp_ch_0.tif.pattern
