In [None]:
import requests

url = 'https://raw.githubusercontent.com/EinarOlafsson/spacr/main/spacr_classification.py'
response = requests.get(url)
with open('spacr_classification.py', 'w') as file:
    file.write(response.text)
    
import spacr_classification as spacr

In [None]:
# Generate a tar file containing all images in the database (this can be very slow for large datasets)

src = 'path'
spacr.generate_dataset(db_path=src+'/measurements/measurements.db',
                       dst='path',
                       file_type='cell_png',
                       experiment='test', 
                       sample=20)

In [None]:
# Generate train and test folders with class subfolders for training DL models.

src = '/mnt5/einar/cellvoyager/40x/test2/plate1'
spacr.generate_training_dataset(db_path=src+'/measurements/measurements.db',
                                dst='path',
                                mode='annotation', # annotation, recruitment or metadata
                                annotation_column='test', # for annotation
                                annotated_classes=[1,2], # for annotation
                                classes=['nc','pc'], # for annotation
                                size=20, # number of images per class
                                test_split=0.1,
                                metadata_type_by='col',
                                class_metadata=[['c2','c4','c6'],['c22','c23','c3']], # for metadata
                                channel_of_interest=3) # for recruitment

In [None]:
#Train a torch model to classify images. If test is True, a model will be tested on the test set (generated in preveous steps)

src = 'path'
settings = {'train':True,
            'test': False,
            'classes':['nc','pc'],
            'model_type':'maxvit_t',
            'optimizer_type':'adamw',
            'schedule':'reduce_lr_on_plateau', #reduce_lr_on_plateau, step_lr
            'loss_type':'focal_loss', #binary_cross_entropy_with_logits, #focal_loss
            'normalize':True,
            'image_size':224,
            'batch_size':12,
            'epochs':2,
            'val_split':0.1,
            'train_mode':'erm',
            'learning_rate':0.0001,
            'weight_decay':0.00001,
            'dropout_rate':0.1,
            'init_weights':True,
            'amsgrad':True,
            'use_checkpoint':True,
            'gradient_accumulation':True,
            'gradient_accumulation_steps':4,
            'intermedeate_save':True,
            'pin_memory':True,
            'num_workers':30,
            'verbose':True}

spacr.train_test_model(src, settings)

In [None]:
#

result_df = spacr.apply_model_to_tar(tar_path='path', 
                                     model_path='path', #this model is trained on nc vs pc, cells filtered on cell size, parasite size and recruitment >2 for nc and <2 for pc
                                     file_type='cell_png',
                                     image_size=224,
                                     batch_size=12,
                                     normalize=True,
                                     preload='images',
                                     num_workers=30,
                                     verbose=True)

In [None]:
#

src = 'path'
res_df = spacr.merge_pred_mes(src,
                              pred_loc='path',
                              target='protein', 
                              nucleus_dim=0, 
                              parasite_dim=2,
                              cell_dim=3, 
                              channel_of_interest=1,
                              parasite_size_min=0, 
                              nucleus_size_min=0,
                              cell_size_min=0,
                              parasite_min=0,
                              nucleus_min=0, 
                              cell_min=0,
                              target_min=0,
                              mask_chans=[0,1,2],
                              filter_data=False,
                              include_noninfected=False,
                              include_multiinfected=True, #bool or int
                              include_multinucleated=True,
                              cells_per_well=0,
                              save_filtered_filelist=False,
                              verbose=True)

dv_loc = src+'/measurements/'
spacr.generate_dependent_variable(df=res_df, 
                                  dv_loc=dv_loc, 
                                  pc_min=0.99, 
                                  nc_max=0.95, 
                                  agg_type='mean')

In [None]:
#

coeffs = spacr.analyze_data_reg(sequencing_loc='path', 
                               dv_loc='path', 
                               dv_col = 'recruitment',
                               agg_type = 'mean',
                               transform='log',
                               min_cell_count=100, 
                               min_reads=75,
                               min_wells=1, 
                               max_wells=50,
                               min_frequency=0.05,
                               remove_outlier_genes=False, 
                               refine_model=True,
                               by_plate=False, 
                               regression_type='lasso',
                               alpha_value=0.000001,
                               fishers=True,
                               fisher_threshold=1.3)