In [19]:
import os
import json
import pandas as pd
import numpy as np
from datetime import datetime

### Making a dataframe from the results folders to handle the data

In [14]:
# params
path = '/media/guillermo/Expansion/Thesis GY/3. Analyzed files'
df_in = pd.read_excel('Positions.xlsx')
df_out = df_in[df_in['Quality'] == 'Very good'].copy().reset_index(drop=True)

df_out.loc[:,["area_max"]] = np.nan
# Initialize the columns as type 'object' to store arrays/lists
df_out['radius'] = pd.Series(dtype='object') # colony radius in time
df_out['area'] = pd.Series(dtype='object') # colony area in time

# growth fitting
df_out.loc[:,["gomp_params"]] = np.nan # parameters fitting Gompertz
df_out.loc[:,["logistic_params"]] = np.nan # parameters fitting Logistic
df_out.loc[:,["t_m_gomp"]] = np.nan # time at which mu is maximum in Gompertz
df_out.loc[:,["t_m_logistic"]] = np.nan # time at which mu is maximum in Logistic

# velocity field fitting
df_out.loc[:,["r0"]] = np.nan

# Display the head of the DataFrame to check the assignment
df_out.head()

Unnamed: 0,Date,Machine,Position,Strain,DNA,Chemical,Pad,Dilution,t_incub,t_im,...,area_max,radius,area,gomp_params,logistic_params,t_m_gomp,t_m_logistic,r0,mu0,ws_params
0,2023-11-15,Ti scope,0,MC4100,pLPT20&pLPT41,-,1,10^(-5),09:25:00,14:41:00,...,,,,,,,,,,
1,2023-11-15,Ti scope,1,MC4100,pLPT20&pLPT41,-,1,10^(-5),09:25:00,14:41:00,...,,,,,,,,,,
2,2023-11-15,Ti scope,3,MC4100,pLPT20&pLPT41,-,1,10^(-5),09:25:00,14:41:00,...,,,,,,,,,,
3,2023-11-15,Ti scope,6,MC4100,pLPT20&pLPT41,-,1,10^(-5),09:25:00,14:41:00,...,,,,,,,,,,
4,2023-11-15,Ti scope,7,MC4100,pLPT20&pLPT41,-,1,10^(-5),09:25:00,14:41:00,...,,,,,,,,,,


#### Extract area and radius endpoints, as well as all dynamics

In [15]:
# extract area and radius from each position's results and store it in a dataframe
scopes = os.listdir(path)
for s in scopes:
    scope_dates = os.listdir(os.path.join(path,s))
    scope_dates.sort()
    for d in scope_dates:
        date = datetime.strptime(d, '%Y_%m_%d')
        data = df_out[(df_out.Date == date) & (df_out.Machine == s)]
        pos = data.Position.unique()
        path_results = os.path.join(path,s,d,'results')
        for p in pos:
            path_pos = os.path.join(path_results,f'pos{p}')
            area_path = os.path.join(path_pos, 'area.npy')
            radius_path = os.path.join(path_pos, 'radius.npy')

            ridx = df_out[(df_out.Date == date) & (df_out.Machine==s) & (df_out.Position == p)].index[0]

            # load area and radius
            radius = np.load(radius_path)  
            area = np.load(area_path)
            outn = 'data_processed.xlsx'

            # quantities are not filtered
            df_out.at[ridx, 'radius'] = radius
            df_out.at[ridx, 'area'] = area

In [17]:
df_out['area_max'] = df_out['area'].apply(lambda s: s.max())

In [20]:
df_out.to_excel(outn, index=False)

#### PENDING TO ADD AND PROCESS

- Compute incubation time
- Pixel-um equivalence
- Radius in um
- Max radius in um
- Area in um
- Max area in um
- Mean colony fluo, time serie
- Mean colony fluo, single number
- Mean colony rho, time serie
- Mean colony rho, single number
- Mean colony rho in center, time serie
- Mean colony rho in center, single number

In [22]:
type(radius)

numpy.ndarray

In [24]:
df = pd.read_excel('data_processed.xlsx')

In [25]:
df.head()

Unnamed: 0,Date,Machine,Position,Strain,DNA,Chemical,Pad,Dilution,t_incub,t_im,...,radius,area,gomp_params,logistic_params,t_m_gomp,t_m_logistic,r0,wave_pos_coefs,wave_speed_av,rad_max
0,2023-11-15,Ti scope,0,MC4100,pLPT20&pLPT41,-,1,10^(-5),09:25:00,14:41:00,...,[ 31.60972529 34.52176435 36.57236856 38.34...,[ 3139 3744 4202 4619 5359 6034 6865 81...,"{""A"": 11.29467129374218, ""mu_m"": 0.03429671707...","{""A"": 11.291854811467543, ""mu_m"": 0.0253848483...",196.234198,225.219567,22.567534,[ 4.28881615e-09 -1.14403907e-06 1.17932253e-...,9.117994,160.398612
1,2023-11-15,Ti scope,1,MC4100,pLPT20&pLPT41,-,1,10^(-5),09:25:00,14:41:00,...,[ 40.6529818 43.49019825 45.48991724 49.00...,[ 5192 5942 6501 7546 8788 10159 11...,"{""A"": 11.651550757675393, ""mu_m"": 0.0365208238...","{""A"": 11.649139256480966, ""mu_m"": 0.0268369079...",189.07144,215.629207,32.953934,[ 1.61900671e-09 -4.65177336e-07 5.07279382e-...,8.818193,192.998198
2,2023-11-15,Ti scope,3,MC4100,pLPT20&pLPT41,-,1,10^(-5),09:25:00,14:41:00,...,[ 45.01511674 47.75990624 49.65509418 53.29...,[ 6366 7166 7746 8922 10142 11787 13...,"{""A"": 11.58037729284504, ""mu_m"": 0.03699178625...","{""A"": 11.578378488201757, ""mu_m"": 0.0270179440...",180.609065,205.356638,43.862215,[ 1.12971199e-09 -4.03357938e-07 5.59275580e-...,7.840974,185.582357
3,2023-11-15,Ti scope,6,MC4100,pLPT20&pLPT41,-,1,10^(-5),09:25:00,14:41:00,...,[ 48.18126192 51.43726056 53.67219457 56.63...,[ 7293 8312 9050 10078 10894 11961 13486 151...,"{""A"": 11.263179105925273, ""mu_m"": 0.0342345182...","{""A"": 11.261690794798985, ""mu_m"": 0.0247235365...",151.439181,174.692595,20.49891,[ 1.51592115e-09 -5.35988459e-07 7.39024159e-...,8.04819,158.601509
4,2023-11-15,Ti scope,7,MC4100,pLPT20&pLPT41,-,1,10^(-5),09:25:00,14:41:00,...,[ 43.24431271 46.3082466 48.00255479 48.82...,[ 5875 6737 7239 7488 8606 9935 12...,"{""A"": 11.528274906804539, ""mu_m"": 0.0376935142...","{""A"": 11.526236072514887, ""mu_m"": 0.0275867338...",186.954076,211.573857,30.891799,[ 5.29691541e-12 -8.18086232e-09 3.27859257e-...,9.299032,179.352843


In [30]:
for idx in df.index.values:
    df.at[idx,'area_max_um'] = df.loc[idx,'area_max'] * df.loc[idx,'um_px']**2
    df.at[idx,'rad_max_um'] = df.loc[idx,'rad_max'] * df.loc[idx,'um_px']

In [32]:
df.to_excel('data_processed.xlsx', index=False)