In [None]:
# want to see the images inline
# %matplotlib inline
%load_ext nb_black

In [None]:
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from scipy.stats import gamma
from scipy.optimize import curve_fit
from scipy import special

from configparser import ConfigParser, ExtendedInterpolation

import utils as utils

from tqdm import tqdm_notebook as tqdm

In [None]:
from pandas.errors import EmptyDataError

In [None]:
pd.set_option("display.max_columns", 50)

In [None]:
# read config file
config = ConfigParser(interpolation=ExtendedInterpolation())
config.read("config.ini")

In [None]:
FOLDER = config.get("all", "FOLDER")
DB_FILENAME = config.get("all", "DB_FILENAME")
DB_NEW_FILENAME = config.get("all", "DB_NEW_FILENAME")
# EXPERIMENTS = ["N2", "SEA-12", "MK4", "CB428", "RNAi"]
CHANNELS = ["C0-", "C1-", "C2-"]
SMFISH_COLUMNS = ["#c0_smfish", "#c1_smfish", "#c2_smfish"]

In [None]:
# read the db and parse images that we want to process
df_path = os.path.join(FOLDER, "smFISH-database", DB_FILENAME)
df = pd.read_csv(df_path, sep=",", na_values=[""])
df.head()

In [None]:
# TODO: maybe it is necessary to fill in other values here, too
# fix missing entries in the colmns that we are planning to use
df["cropped_image_file"].fillna("", inplace=True)
df["c0_type"].fillna("", inplace=True)
df["c1_type"].fillna("", inplace=True)
df["c2_type"].fillna("", inplace=True)

In [None]:
for c in SMFISH_COLUMNS:
    df[c] = -1

In [None]:
pbar = tqdm(total=len(df))
for i, r in df.iterrows():
    pbar.update(1)
    filename = r["cropped_image_file"]
    if filename == "":
        continue

    # e = filename.split('_')[0]
    folder = os.path.join(FOLDER, "csv-2")

    for c in CHANNELS:
        filepath = os.path.join(folder, c + filename[:-4] + ".csv")
        if not os.path.exists(filepath):
            continue

        smfish_column = "#" + c.lower()[:2] + "_smfish"
        df.at[i, smfish_column] = -3
        # this one is for counting
        num_lines = sum(1 for line in open(filepath))
        df.at[i, smfish_column] = num_lines
        # this one is for adjusted intensities
        try:
            tmp = pd.read_csv(
                filepath,
                sep="\t",
                na_values=[""],
                index_col=0,
                header=None,
                error_bad_lines=False,
                warn_bad_lines=True,
            )

            df.at[i, smfish_column + "_adj"] = tmp[4].sum()
        except (EmptyDataError):
            print(f"Is empty: {filepath}")

pbar.close()

In [None]:
df.head()

In [None]:
df.to_csv(os.path.join(FOLDER, "smFISH-database", DB_NEW_FILENAME), index=False)