# **Figure 1 Notebook**

This notebook provides tools and data to recreate the figure 1 from Naik et al 2026. 

It has been modified code from the original repo to allow for data retrival from the datashare and plotting using standarized versions of the code to plot the figures. 

Each cell gives a brief description of the panel plotted and where the outputs would be stored.

**Author**: Suyash Naik

In [4]:
from src.BinnedPlotter import BinnedIntensityPlotter
from src.Figureplot import Figureplot
import os
from src.BinnedPlotter import BinnedIntensityPlotter
from src.Figureplot import Figureplot
import os
from glob import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
import re 

Figure panel C: Binned intensity plot of keratin network maturation in EVL cells

Averaged keratin intensity within the EVL in Tg(krt18:Krt18-GFP) embryos as a 
function of time during epiboly (4 - 9.5 hpf).
N = 3 experiments, n = 6 embryos. 

Error bars as ribbons representing SEM plotted along mean.


In [None]:

Fig1folder= "/home/snaik/Projects/PaperPublishgit/data/"
panelc=Fig1folder+"Fig1/1C/"
intensity_files = glob(os.path.join(panelc, "*.csv"))
outputfolder="Figure/Figure1/PanelC/"
os.makedirs(outputfolder, exist_ok=True)
print(f"Panel C has {len(intensity_files)} files binned and plotted. Output saved in {outputfolder} folder.")

In [None]:
intensityData=pd.DataFrame()
binnedIntensityData=pd.DataFrame()
bin_edges= np.arange(4,10,0.25)
# Compute the bin averages
bin_centers = 0.5 * (bin_edges[1:] + bin_edges[:-1])
binnedIntensityData["Time (hpf)"]=bin_centers
bin_averages = np.zeros(len(bin_centers))
for file in tqdm(intensity_files):
    if file.find("0804")==-1:
        data = pd.read_csv(file)
        data["Label"]="23022021"+file.split("Pos")[1][0:3]
        data["Time"]=[(x-1)*15.5/60+4 for x in data[" "]]
        intensityData = pd.concat([intensityData,data])
        for i in range(len(bin_centers)):
            bin_averages[i] = np.mean(data["Mean"][(data["Time"]>bin_edges[i]) & (data["Time"]<bin_edges[i+1])])
        binnedIntensityData["23022021_"+file.split("Pos")[1][0:3]]=bin_averages
    
    else:
        data = pd.read_csv(file)
        
        data["Time"]=[(x-1)*5.2/60+4 for x in data[" "]]
        intensityData = pd.concat([intensityData,data])
        for i in range(len(bin_centers)):
            bin_averages[i] = np.mean(data["Mean"][(data["Time"]>bin_edges[i]) & (data["Time"]<bin_edges[i+1])])
        binnedIntensityData["08042021_"+file.split("Pos")[1][0:3]]=bin_averages
binnedIntensityData.to_csv(outputfolder+"Figure1D_binnedIntensityData.csv")
#plot the average of columns 1: data in binnedIntensityData dataframe as a line plot with error bars as a ribbon around the line
fig, ax = plt.subplots(figsize=(7, 5.3))
plt.rcParams['figure.dpi'] = 100 # 200 e.g. is really fine, but slower
plt.rcParams['font.size'] = 24
plt.rcParams['savefig.dpi'] = 300
plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = 'Arial'
plt.gca().spines['right'].set_color('none')
plt.gca().spines['top'].set_color('none')
#ax.set_xlabel("Time (hpf)")
#ax.set_ylabel("Keratin Intensity (A.U.)")
ax.set_xlim(4.5,8.5)
ax.set_xticks(np.arange(4.5,9,0.25))
ax.set_xticklabels(["4.5","","","","5.5","","","","6.5","","","","7.5","","","","8.5",""])
ax.set_yticks(np.arange(0,470,25),minor=True)
ax.plot(binnedIntensityData["Time (hpf)"],binnedIntensityData.iloc[:,1:5].mean(axis=1),color="#83bb03",linewidth=2)
#ax.scatter(binnedIntensityData["Time (hpf)"],binnedIntensityData.iloc[:,1:5].mean(axis=1),color="#83bb03",s=4)
ax.fill_between(binnedIntensityData["Time (hpf)"],binnedIntensityData.iloc[:,1:5].mean(axis=1)-binnedIntensityData.iloc[:,1:5].sem(axis=1),binnedIntensityData.iloc[:,1:5].mean(axis=1)+binnedIntensityData.iloc[:,1:5].sem(axis=1),color="#83bb03",alpha=0.3)
plt.savefig(outputfolder+"Figure1D_KeratinIntensity_nolab.png",dpi=300,bbox_inches='tight',transparent=True)
plt.savefig(outputfolder+"Figure1D_KeratinIntensity_nolab.pdf",dpi=300,bbox_inches='tight',transparent=True)
plt.savefig(outputfolder+"Figure1D_KeratinIntensity_nolab.svg",dpi=300,bbox_inches='tight',transparent=True)
plt.show()


Figure panel D: Keratin network density during epiboly progression

Averaged density of the keratin network in Tg(acbt2:Utrophin-mcherry, krt18:Krt18-GFP) embryos
as a function of time during epiboly (4 - 9.5 hpf). N = 3, n = 3 embryos. 

Error bars as ribbons representing SEM plotted along mean.



In [None]:
ndfolders = glob("data/Fig1/1D/*")
outputfolder="Figure/Figure1/PanelD/"
os.makedirs(outputfolder,exist_ok=True)
print(f"Panel D has {len(ndfolders)} files binned and plotted. Output saved in {outputfolder} folder.")

In [None]:
timebins = np.arange(4, 10, 0.5)
ndBins = pd.DataFrame({"Time (hpf)": timebins})

def extract_time_idx(path):
    m = re.findall(r"(\d+)(?=\.csv$)", path)
    if m:
        return int(m[-1])
    m = re.findall(r"TimeSrs(\d+)", path)
    if m:
        return int(m[-1])
    m = re.findall(r"(\d+)", os.path.basename(path))
    return int(m[-1]) if m else 0


def build_nd_dataset(folder, time_minutes, time0, id_tag, timebins, skip_patterns=None, save_name=None):
    files = sorted(glob(os.path.join(folder, "*.csv")), key=extract_time_idx)
    if skip_patterns:
        files = [f for f in files if not any(p in os.path.basename(f) for p in skip_patterns)]
    if not files:
        print(f"No CSV files found in {folder}")
        return None
    timelist, ndlist, ndsemlist = [], [], []
    arealist, areasemlist = [], []
    intensitylist, intensitysemlist = [], []
    for idx, file in enumerate(files):
        data = pd.read_csv(file)
        time = time0 + (idx + 1) * time_minutes / 60
        timelist.append(time)
        ndlist.append(data["%Area"].mean())
        ndsemlist.append(data["%Area"].sem())
        arealist.append(data["Area"].mean())
        areasemlist.append(data["Area"].sem())
        intensitylist.append(data["Mean"].mean())
        intensitysemlist.append(data["Mean"].sem())
    ndpd = pd.DataFrame({
        "Time (hpf)": timelist,
        "NetworkDensity": ndlist,
        "NetworkDensity_SEM": ndsemlist,
        "Area": arealist,
        "Area_SEM": areasemlist,
        "Intensity": intensitylist,
        "Intensity_SEM": intensitysemlist,
    })
    processed_dir = os.path.join(folder, "Processed")
    os.makedirs(processed_dir, exist_ok=True)
    if save_name is None:
        save_name = f"{id_tag}_NetworkPd.csv"
    ndpd.to_csv(os.path.join(processed_dir, save_name), index=False)

    ndBins[f"{id_tag}_NetworkDensity"] = np.zeros(len(timebins))
    ndBins[f"{id_tag}_NetworkDensity_SEM"] = np.zeros(len(timebins))
    ndBins[f"{id_tag}_Area"] = np.zeros(len(timebins))
    ndBins[f"{id_tag}_Area_SEM"] = np.zeros(len(timebins))
    ndBins[f"{id_tag}_Intensity"] = np.zeros(len(timebins))
    ndBins[f"{id_tag}_Intensity_SEM"] = np.zeros(len(timebins))
    for i in range(len(timebins) - 1):
        mask = (ndpd["Time (hpf)"] > timebins[i]) & (ndpd["Time (hpf)"] < timebins[i + 1])
        ndBins.loc[i, f"{id_tag}_NetworkDensity"] = ndpd["NetworkDensity"][mask].mean()
        ndBins.loc[i, f"{id_tag}_NetworkDensity_SEM"] = ndpd["NetworkDensity"][mask].sem()
        ndBins.loc[i, f"{id_tag}_Area"] = ndpd["Area"][mask].mean()
        ndBins.loc[i, f"{id_tag}_Area_SEM"] = ndpd["Area"][mask].sem()
        ndBins.loc[i, f"{id_tag}_Intensity"] = ndpd["Intensity"][mask].mean()
        ndBins.loc[i, f"{id_tag}_Intensity_SEM"] = ndpd["Intensity"][mask].sem()
    return ndpd
folder1control=ndfolders[0]
ndpd2503 = build_nd_dataset(
    folder1control,
    time_minutes=20,
    time0=4.0,
    id_tag="2503",
    timebins=timebins,
    skip_patterns=["25032023_EVPLMONetworkPd.csv"],
    save_name="25032023_EVPLMONetworkPd.csv",
 )
folder2control=ndfolders[2]
ndpd1801 = build_nd_dataset(
    folder2control,
    time_minutes=10,
    time0=4.5,
    id_tag="1801",
    timebins=timebins,
    save_name="18012022_Control.csv",
 )
folder3control=ndfolders[1]
ndpd2410 = build_nd_dataset(
    folder3control,
    time_minutes=15,
    time0=4.0,
    id_tag="2410",
    timebins=timebins,
    save_name="24102023_caRhoA.csv",
 )

ndBins.to_csv(outputfolder+"NetworkDensityData_3_16042024.csv")


In [None]:
#Plot the average network density data in ndBins as a line with the error bars as a ribbon around the line
fig, ax = plt.subplots(figsize=(7, 5.3))
plt.rcParams['figure.dpi'] = 100 # 200 e.g. is really fine, but slower
plt.rcParams['font.size'] = 24
plt.rcParams['savefig.dpi'] = 300
plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = 'Arial'
plt.gca().spines['right'].set_color('none')
plt.gca().spines['top'].set_color('none')
#ax.set_xlabel("Time (hpf)")
#ax.set_ylabel("Network Density (%)")
ax.set_xlim(4.5,8.5)
ax.set_ylim(0,100)
ax.set_xticks(np.arange(4.5,9,0.25))
ax.set_xticklabels(["4.5","","","","5.5","","","","6.5","","","","7.5","","","","8.5",""])
ax.set_yticks(np.arange(0,100,10),minor=True)
networkavg = ndBins[["1801_NetworkDensity", "2410_NetworkDensity", "2503_NetworkDensity"]].mean(axis=1)
networksem=ndBins[["1801_NetworkDensity","2410_NetworkDensity","2503_NetworkDensity"]].sem(axis=1)
for index,value in enumerate(networksem):
    if value==np.nan:
        value=ndBins["2503_NetworkDensity_SEM"][index]
ax.plot(ndBins["Time (hpf)"],networkavg,color="#83bb03",linewidth=2)
ax.fill_between(ndBins["Time (hpf)"],networkavg-networksem,networkavg+networksem,color="#83bb03",alpha=0.3)
plt.savefig(outputfolder+"1604_NetworkDensity.png",dpi=300,bbox_inches='tight',transparent=True)
plt.savefig(outputfolder+"1604_NetworkDensity.pdf",dpi=300,bbox_inches='tight',transparent=True)
plt.savefig(outputfolder+"1604_NetworkDensity.svg",dpi=300,bbox_inches='tight',transparent=True)
pd.DataFrame([ndBins["Time (hpf)"],networkavg,networksem]).T.to_csv(outputfolder+"1604_NetworkDensity.csv")
plt.show()