In [1]:
# Using Python 3.12.1 (local env: HeatNSalt)

# Retrieve information about outputs/image_data_complete.csv

import pandas as pd

image_data = pd.read_csv('outputs/image_data_complete.csv').dropna(axis=1)
image_data['Specimen ID'] = image_data['Sample Name/Number'].str.split('_').str[:-1].str.join('_') # create new column with specimen ID from Sample Name/Number

id_counts = image_data['Specimen ID'].value_counts()
min_count = id_counts.min()
mean_count = id_counts.mean()
median_count = id_counts.median()
std_count = id_counts.std()
max_count = id_counts.max()


pd.DataFrame(id_counts).to_excel('outputs/info_image_data_count.xlsx')

# Retrieve information about outputs/setup_data_complete.csv

setup_data = pd.read_csv('outputs/setup_data_complete.csv')
setup_data[['Plate', 'Well',  'Increment', 'Imaging_run']]= setup_data['Sample Name/Number'].str.split('_', expand = True)
setup_data = setup_data.drop(['Imaging_run'], axis=1)
setup_data = setup_data.drop(['Determiner'], axis=1)
setup_data['Sample Name/Number'] = setup_data['Sample Name/Number'].apply(lambda x: x.rsplit('_', 1)[0])

# Retrieve information about raw/specimen_weights.xlsx


specimen_info = pd.read_excel('raw/specimen_weights.xlsx', index_col=0)

specimen_info['Specimen Weight'] = specimen_info['Tube Weight with dried Specimen'] - specimen_info['Empty Tube Weight']

NCfiltered_specimen_info = specimen_info[specimen_info['Channel'].str.contains('NC')]
negative_weight_NCs = NCfiltered_specimen_info[NCfiltered_specimen_info['Specimen Weight'] < 0]
filtered_specimen_info = specimen_info[~specimen_info['Channel'].str.contains('NC')]
negative_weight_specimens = filtered_specimen_info[filtered_specimen_info['Specimen Weight'] < 0]


with open('outputs/info_images.txt', 'w') as tf:
    tf.write('Printing information about all collected images\n')
    tf.write('\n')
    tf.write('Minimum number of images per specimen: '+ str(min_count)+'\n')
    tf.write('Mean number of images per specimen: ' + str(mean_count)+'\n')
    tf.write('Median number of images per specimen: '+ str(median_count)+'\n')
    tf.write('Standard deviation number of images per specimen: '+ str(std_count)+'\n')
    tf.write('Maximum number of images per specimen: '+ str(max_count)+'\n')
    tf.write('\n')
    tf.write('\n')
    tf.write('Printing information about specimen with mathematically negative weights:'+'\n')
    tf.write('\n')
    tf.write('Number of negative controls with mathematically negative weights: '+ str(len(NCfiltered_specimen_info[NCfiltered_specimen_info['Specimen Weight']<0]))+'\n')
    tf.write(str(negative_weight_NCs['Sample Name/Number'].values)+'\n')
    tf.write('\n')
    tf.write('Number of specimen with mathematically negative weights: '+ str(len(filtered_specimen_info[filtered_specimen_info['Specimen Weight']<0]))+'\n')
    tf.write(str(negative_weight_specimens['Sample Name/Number'].values))



