In [1]:
from measure import get_database_interface
import yaml

In [2]:
import pandas as pd
from ftplib import *
from io import StringIO
import io

In [3]:
from tqdm.auto import tqdm

In [47]:
def save_df_to_ftp(df, pretty_name, verbose=False):
    # function to put the generated values directly to FTP
    ftp = FTP('ftp-private.ebi.ac.uk')

    ftp.login('indigene_ftp', password)
    ftp.cwd('/upload/ct_scans/morphometric_analysis')
    
    buffer = StringIO()
    df.to_csv(buffer, header=True, index=False, columns=['fish_id', 'line_id', 'fish_sex', 'fish_age', 'fish_generation', 'measurement'])
    text = buffer.getvalue()
    bio = io.BytesIO(str.encode(text))
    
    ftp.storbinary(f'STOR {pretty_name}.txt', bio)
    
    if verbose:
        print(ftp.dir())

In [5]:
def list_ftp():
    # function to put the generated values directly to FTP
    ftp = FTP('ftp-private.ebi.ac.uk')

    ftp.login('indigene_ftp', password) # don't forget to remove the pass before the git-committing
    ftp.cwd('/upload/ct_scans/morphometric_analysis')
    
    print(ftp.dir())

In [6]:
def get_df(iface, par_name):
    # create dataframe from the database
        
    selection = iface.as_table([par_name, 'line', 'id', 'sex', 'age', 'generation'])
    df = pd.DataFrame(selection)
    df = df.loc[~df[par_name].isna()]
    df = df.explode(par_name).explode(par_name).explode(par_name)
    df = df.rename(columns={o:n for o,n in zip([par_name, 'line', 'id', 'sex', 'age', 'generation'], 
                                               ['measurement', 'line_id', 'fish_id', 'fish_sex', 'fish_age', 'fish_generation'])})
    return df

## Connect to the database and ftp

In [7]:
with open('measurement_configs/processing/medaka_mongo.yaml') as f:
    iface = get_database_interface(yaml.safe_load(f)['db'])

In [25]:
# Getting password for the FTP

import getpass
password = getpass.getpass(prompt='Password for the FTP, please: ')
try:
    list_ftp()
except error_perm:
    print('Password is incorrect! Please, re-run this cell and try again.')

Password for the FTP, please: ········
drwxrws---    2 ftp      ftp          4096 Apr 01  2021 backup
-rw-rw----    1 ftp      ftp         18392 Aug 24 15:20 eyes.iris.color_average.txt
-rw-rw----    1 ftp      ftp         18339 Aug 24 15:20 eyes.iris.color_std.txt
-rw-rw----    1 ftp      ftp          8940 Aug 24 15:20 eyes.iris.distance_between_centers.txt
-rw-rw----    1 ftp      ftp         14371 Aug 24 15:20 eyes.iris.eccentricity_meridional.txt
-rw-rw----    1 ftp      ftp         12353 Aug 24 15:20 eyes.iris.surface_area.txt
-rw-rw----    1 ftp      ftp         12356 Aug 24 15:20 eyes.iris.volume.txt
-rw-rw----    1 ftp      ftp         18849 Aug 24 15:20 eyes.lens.color_average.txt
-rw-rw----    1 ftp      ftp         18635 Aug 24 15:20 eyes.lens.color_std.txt
-rw-rw----    1 ftp      ftp          9444 Aug 24 15:20 eyes.lens.distance_between_centers.txt
-rw-rw----    1 ftp      ftp        110576 Aug 24 15:20 eyes.lens.radius_axial.txt
-rw-rw----    1 ftp      ftp         12559 

## Adding meta-information to the database

In [27]:
scan_201811 = pd.read_excel('samplelist_complete.xlsx', sheet_name=4)
scan_201905 = pd.read_excel('samplelist_complete.xlsx', sheet_name=3)
scan_201912 = pd.read_excel('samplelist_complete.xlsx', sheet_name=2)
scan_202012 = pd.read_excel('samplelist_complete.xlsx', sheet_name=1)

all_scans = pd.concat([scan_201811, scan_201905, scan_201912, scan_202012])
all_scans = all_scans[all_scans['Scanned stained'].isin(['yes', 'Yes', 'Scanned', 'scanned'])]
all_scans = all_scans[['S.No', 'Line', 'Gen.', 'Sex', 'DOB', 'DOS']]
all_scans['age'] = (all_scans['DOS'] - pd.to_datetime(all_scans['DOB'], errors='coerce')).dt.days
all_scans = all_scans.drop(['DOS', 'DOB'], axis=1)

all_scans = all_scans.rename(columns={'S.No': 'id', 'Line': 'line', 'Gen.': 'generation', 'Sex': 'sex', 'age':'age'})
all_scans['id'] = pd.to_numeric(all_scans['id'], errors='coerce')
all_scans = all_scans[~all_scans.isna()['id']]
all_scans['id'] = all_scans['id'].astype(int).astype(str)

In [30]:
set_of_reliable_ids = set([i['id'] for i in iface.as_table([])]) & set(all_scans.id)

In [35]:
for id in tqdm(set_of_reliable_ids):
    iface.update_sample_record(all_scans[all_scans['id'] == id].iloc[0].to_dict())

HBox(children=(IntProgress(value=0, max=324), HTML(value='')))




## Generating the dataframes with all measurements available to be saved

In [42]:
possible_fields = set([i for rec in iface.as_table([]) for i in rec.keys()]) - {'line', 'id', 'sex', 'age', 'generation'}
for measurement_key in tqdm(possible_fields):
    df = get_df(iface, measurement_key)
    print(measurement_key, df.dropna(how='any', subset=['measurement']).fish_id.nunique())

HBox(children=(IntProgress(value=0, max=35), HTML(value='')))

eyes.muscles.surface_area 324
eyes.nerve.color_std 234
eyes.nerve.surface_area 324
liver.liver.color_std 293
liver.liver.surface_area 298
eyes.lens.surface_area 324
eyes.muscles.distance_between_centers 312
eyes.muscles.volume 324
eyes.iris.color_std 315
eyes.lens.radius_axial 319
eyes.lens.distance_between_centers 324
eyes.iris.volume 324
eyes.retina.volume 324
eyes.nerve.color_average 234
eyes.lens.color_average 324
eyes.nerve.volume 324
eyes.retina.color_average 324
eyes.retina.surface_area 324
eyes.iris.eccentricity_meridional 242
eyes.lens.volume 324
liver.liver.eccentricity_equatorial 293
liver.liver.eccentricity_meridional 293
eyes.lens.color_std 324
eyes.muscles.color_average 316
eyes.muscles.eccentricity_meridional 225
eyes.muscles.color_std 316
liver.liver.volume 298
eyes.lens.thickness_axial 293
liver.liver.color_average 293
eyes.iris.color_average 315
eyes.retina.eccentricity_meridional 320
eyes.retina.distance_between_centers 320
eyes.retina.color_std 324
eyes.iris.surface

In [48]:
## generating and uploading all datasets

for measurement_key in tqdm(possible_fields):
    df = get_df(iface, measurement_key)
    save_df_to_ftp(df, measurement_key)

HBox(children=(IntProgress(value=0, max=35), HTML(value='')))




In [49]:
list_ftp()

drwxrws---    2 ftp      ftp          4096 Apr 01  2021 backup
-rw-rw----    1 ftp      ftp         23723 Oct 05 10:48 eyes.iris.color_average.txt
-rw-rw----    1 ftp      ftp         23680 Oct 05 10:47 eyes.iris.color_std.txt
-rw-rw----    1 ftp      ftp         11558 Oct 05 10:49 eyes.iris.distance_between_centers.txt
-rw-rw----    1 ftp      ftp         18471 Oct 05 10:48 eyes.iris.eccentricity_meridional.txt
-rw-rw----    1 ftp      ftp         16033 Oct 05 10:49 eyes.iris.surface_area.txt
-rw-rw----    1 ftp      ftp         16030 Oct 05 10:47 eyes.iris.volume.txt
-rw-rw----    1 ftp      ftp         24563 Oct 05 10:47 eyes.lens.color_average.txt
-rw-rw----    1 ftp      ftp         24298 Oct 05 10:48 eyes.lens.color_std.txt
-rw-rw----    1 ftp      ftp         12285 Oct 05 10:47 eyes.lens.distance_between_centers.txt
-rw-rw----    1 ftp      ftp        143819 Oct 05 10:47 eyes.lens.radius_axial.txt
-rw-rw----    1 ftp      ftp         16350 Oct 05 10:47 eyes.lens.surface_area.txt