In [None]:
import pandas as pd
import numpy as np
import json
import os
from tqdm import tqdm_notebook as tqdm

import utils as utils

from configparser import ConfigParser, ExtendedInterpolation

In [None]:
# read config file 
config = ConfigParser(interpolation=ExtendedInterpolation())
config.read('config.ini')

In [None]:
FOLDER = config.get('main', 'ROOTFOLDER')
DB_FILENAME = config.get('main', 'DB_FILENAME')
TYPES = json.loads(config.get("main","TYPES"))
SPAN_FILENAME = config.get("main", "SPAN_FILENAME")

DB_FILENAME = config.get("main", "DB_FILENAME")
DB_NEW_FILENAME = config.get("main", "DB_NEW_FILENAME")

In [None]:
os.path.join(FOLDER, "smFISH-database", SPAN_FILENAME)

In [None]:
# important indices
type_columns = ['c0_type', 'c1_type', 'c2_type']
stain_prefix = np.array([['C0-', 'C1-', 'C2-', 'C3-', 'C4-']])
filename_column = 'cropped_image_file'

In [None]:
# read the db and parse images that we want to process
df_path = os.path.join(FOLDER, 'smFISH-database', DB_FILENAME)
df = pd.read_csv(df_path, 
                 sep=',', 
                )

In [None]:
# TODO: maybe it is necessary to fill in other values here, too
# fix missing entries in the colmns that we are planning to use 
df['cropped_image_file'].fillna('', inplace=True)
df['c0_type'].fillna('', inplace=True)
df['c1_type'].fillna('', inplace=True)
df['c2_type'].fillna('', inplace=True)

In [None]:
dff = df.copy()

In [None]:
# seems to be working
row, col = np.where(dff[type_columns].applymap(lambda x: x in TYPES))
n_samples = dff.shape[0]
new_prefix = np.repeat(stain_prefix, n_samples, axis=0)[row, col]
new_filename = dff[filename_column].values[row]
full_filenames = ["{}{}".format(a_, b_[:-4]) for a_, b_ in zip(new_prefix, new_filename)]

In [None]:
dataset = []
pbar = tqdm(total=len(full_filenames))
for ff in full_filenames: 
    tmp = os.path.join(FOLDER, "csv-2", ff + ".csv")
    dataset.append(tmp)
    pbar.update(1)
pbar.close()

In [None]:
print(f'Processing: {len(dataset)} files') 

# TODO: consider preallocation
df = pd.DataFrame(columns=["image", "span"])
# actual plotting 
pbar = tqdm(total=len(dataset))
for d in dataset:    
    pbar.update(1) 
    if(not os.path.exists(d)):
        continue
        
    try:
        
        # load the data and scale it accordingly
        I, z = utils.load_i_z(d, skiprows_=0)

        df = df.append(
            {"image": d.split("/")[-1][:-4], 
             "span": round(max(z) - min(z), 2)
            }, 
            ignore_index=True,
        )
 
    except(RuntimeError, TypeError, ValueError):
        print("There was an exception but we\'ll fix it for you")
pbar.close()

In [None]:
df

In [None]:
df["channel"] = df["image"].apply(lambda x: x[:2].lower() + '_span')
df["image"] = df["image"].apply(lambda x: x[3:] + '.tif')

In [None]:
df_out = df.pivot_table(
    values='span', 
    index='image', 
    columns='channel',
)

In [None]:
df_out

In [None]:
df_out.to_csv(os.path.join(FOLDER, "smFISH-database", SPAN_FILENAME))

In [None]:
df_path = os.path.join(FOLDER, "smFISH-database", DB_FILENAME)
df = pd.read_csv(df_path, sep=",")

In [None]:
len(df), len(df_out)

In [None]:
dff = df.copy()

In [None]:
dff = pd.merge(dff, df_out, how='left',left_on='cropped_image_file', right_on='image')

In [None]:
# df_out.to_csv(os.path.join(FOLDER, "smFISH-database", SATURATION_NEW_FILENAME))

In [None]:
dff = dff.rename(columns={"Unnamed: 0": ""})
dff.to_csv(os.path.join(FOLDER, "smFISH-database", DB_NEW_FILENAME), index=False)