# Imports

In [3]:
import sys
import time

import numpy as np
import torch
from bnpm import file_helpers, optimization
import sklearn.utils.class_weight
from torch import nn, optim
from tqdm import tqdm
import sklearn.linear_model
import multiprocessing as mp

import roicat.classification.classifier_util as cu
import scipy.sparse
import roicat
import bnpm.h5_handling
from pathlib import Path
import sys
from pathlib import Path
import shutil
import warnings
import umap

import matplotlib.pyplot as plt
import matplotlib.offsetbox
import json

# Specify Initial Parameters

In [4]:
path_params = None # Path(r"")
directory_data = r'/Users/josh/analysis/outputs/ROICaT/classification/00_data_ingestion'
directory_save = r'/Users/josh/analysis/outputs/ROICaT/classification/01_labels'
testing = True
save_ROIs = True
save_latents = True

In [5]:
directory_save = Path(directory_save)
directory_save.mkdir(exist_ok=True, parents=True)

labelingRun_interim = {}

if path_params is not None:
    try:
        Path(str((directory_save).resolve())).mkdir(exist_ok=True, parents=True)
        shutil.copy2(path_params, str(Path(directory_save) / Path(path_params).name));
    except Exception as e:
        print(f'JZ: Error copying params to {directory_save}')
        print(e)
tic = time.time()
tictoc = {}
tictoc['start'] = time.time() - tic

params = file_helpers.json_load(str(Path(path_params).resolve())) if path_params is not None else None

In [6]:
if params is None:
    params = {
        "method": "simclr",
        "device": "cuda:0",
        "datatype": "raw_images",
        "hyperparameters_split": {
            "n_train": 50000,
            "test_size": 0.3
        },
        "paths": {
            "directory_github": "/Users/josh/analysis/github_repos/",
            "directory_simclrModel": "/Users/josh/analysis//models",
            "filepath_umapModel": None,
        },
        "hyperparameters_training_classifier": {
            "num_transform_copies": 80,
            "solver": "lbfgs",
            "fit_intercept": True,
            "max_iter": 20000,
            "C": 0.01,
            "tol": 0.001,
            "simclrModel_download_url": "https://osf.io/xwzhp/download",
            "simclrModel_download_hash": "134b170242141c26b0adbd9e0fd80d0e"
        },
        # "hyperparameters_augmentations_val": {
        #     "Scale_image_sum": {
        #         "sum_val": 1,
        #         "epsilon": 1e-09,
        #         "min_sub": True
        #     },
        #     "ScaleDynamicRange": {
        #         "scaler_bounds": [0,1],
        #         "epsilon": 1e-09
        #     },
        #     "WarpPoints": {
        #         "r": [0.1,0.2],
        #         "cx": [-0.3,0.3],
        #         "cy": [-0.3,0.3],
        #         "dx": [-0.1,0.1],
        #         "dy": [-0.1,0.1],
        #         "n_warps": 1,
        #         "prob": 0.0,
        #         "img_size_in": [36,36],
        #         "img_size_out": [224,224]
        #     },
        #     "TileChannels": {"dim": -3, "n_channels": 3}
        # },
        "run_umap": True,
    }

roicat.util.helpers.set_device(params['device'])



# Import Datasets

In [8]:
data = roicat.data_importing.Data_roicat(verbose=True)
data.load(str(Path(directory_data) / 'classification_data.pkl'))



Loaded Data_roicat object from /Users/josh/analysis/outputs/ROICaT/classification/00_data_ingestion/classification_data.pkl.


# Pass Data Through Network

In [None]:
import roicat

dataset_val = roicat.ROInet.dataset_simCLR(
        X=torch.as_tensor(data.ROI_images, device='cpu', dtype=torch.float32),
        y=torch.as_tensor(np.zeros((data.ROI_images.shape[0])), device='cpu', dtype=torch.float32),
        n_transforms=1,
        class_weights=np.array([1]),
        # transforms_final_val = cu.get_transforms(params['hyperparameters_augmentations_val'], scripted=True)
        # transform=transforms_final_val, # *Use WarpPoints
        DEVICE='cpu',
        dtype_X=torch.float32,
    )
dataloader_val = torch.utils.data.DataLoader( 
        dataset_val,
        batch_size=64,
        shuffle=False,
        drop_last=False,
        pin_memory=False,
        num_workers=0,#mp.cpu_count(),
        persistent_workers=False,
        prefetch_factor=2,
)

roinet = roicat.ROInet.ROInet_embedder(
    device=params['device'],
    dir_networkFiles=params['paths']['directory_simclrModel'],
    download_method='check_local_first',
    forward_pass_version='head',
    download_url=params['hyperparameters_training_simclr']['simclrModel_download_url'],
    download_hash=params['hyperparameters_training_simclr']['simclrModel_download_hash'],
    verbose=True,
)

print(f'Extracting transformed images from dataloaders, passing through roinet model, and saving to {directory_save}...')

features_val, _labels_val, _idx_val, _sample_val = cu.extract_with_dataloader(
    dataloader_val,
    model=roinet.net,
    num_copies=1,
    device=params['device'],
)

labelingRun_interim['features_val'] = features_val
print(f'Unaugmented run completed.')

# Run 2D UMap OR Project onto Previousy Fit UMap for Hand Labeling

In [None]:
if params['run_umap']:
    umap_params = dict(
        n_neighbors=30,
        n_components=2,
        metric='euclidean',
        metric_kwds=None,
        output_metric='euclidean',
        output_metric_kwds=None,
        n_epochs=None,
        learning_rate=1.0,
        init='spectral',
        min_dist=0.1,
        spread=1.0,
        low_memory=True,
        n_jobs=-1,
        set_op_mix_ratio=1.0,
        local_connectivity=1.0,
        repulsion_strength=1.0,
        negative_sample_rate=5,
        transform_queue_size=4.0,
        a=None,
        b=None,
        random_state=None,
        angular_rp_forest=False,
        target_n_neighbors=-1,
        target_metric='categorical',
        target_metric_kwds=None,
        target_weight=0.5,
        transform_seed=42,
        transform_mode='embedding',
        force_approximation_algorithm=False,
        verbose=False,
        tqdm_kwds=None,
        unique=False,
        densmap=False,
        dens_lambda=2.0,
        dens_frac=0.3,
        dens_var_shift=0.1,
        output_dens=False,
        disconnection_distance=None,
        precomputed_knn=(None, None, None),
    )

    umap = umap.UMAP(**umap_params)
    print('Fitting UMAP...')
    umap.fit(features_val)

    if params['paths']['filepath_umapModel']:
        raise NotImplementedError('Saving UMAP to file not yet implemented.') # TODO: JZ, Implement saving UMAP to file
        # print(f'Saving UMAP to {params["paths"]["filepath_umapModel"]}...')
        # joblib.dump(umap, params['paths']['filepath_umapModel'])
    else:
        print('Unspecified filepath_umapModel in params... not saving UMAP to file.')
elif params['paths']['filepath_umapModel']:
    raise NotImplementedError('Loading UMAP from file not yet implemented.') # TODO: JZ, Implement loading UMAP from file
    # print(f'Loading UMAP from {params["paths"]["filepath_umapModel"]}...')
    # umap = joblib.load(params['paths']['filepath_umapModel'])
else:
    raise ValueError(f'run_umap must be True or filepath_umapModel must be specified in params.')

print('Generating Embeddings...')
embeddings = umap.transform(features_val)
labelingRun_interim['embeddings'] = embeddings
print('Embeddings Generated...')
%matplotlib inline
fig, ax = plt.subplots(figsize=(5,5))

ax.scatter(embeddings[:,0], embeddings[:,1], s=5, cmap='gist_rainbow')

# inx_lst = list(range(100))
inx_lst = np.arange(ROI_images_filt.shape[0])
img_lst = ROI_images_filt[inx_lst]
x = embeddings[inx_lst, 0]
y = embeddings[inx_lst, 1]

for x0, y0, ROI_single in zip(x, y, img_lst):
    offsetImage = matplotlib.offsetbox.OffsetImage(ROI_single, cmap='gray', zoom=0.5) # initialize offset image to contain ROI visualization
    ab = matplotlib.offsetbox.AnnotationBbox(offsetImage, (x0, y0), frameon=False)
    ax.add_artist(ab)

# TODO: JZ, Add Circling Code for Labeling
if testing:
    num_zeroLabels = ROI_images_filt.shape[0]//2
    num_onesLabels = ROI_images_filt.shape[0] - ROI_images_filt.shape[0]//2
    arr_labels = np.concatenate([np.zeros(num_zeroLabels), np.ones(num_onesLabels)])
else:
    raise NotImplementedError('Saving UMAP to file not yet implemented.') # TODO: JZ, Implement saving UMAP to file

# Save Outputs

In [None]:
with open(str((Path(directory_save) / 'arr_labels.npy').resolve()), 'wb') as f:
    np.save(
        file=f,
        arr=arr_labels,
    )

if save_ROIs:
    with open(str((Path(directory_save) / 'arr_ROIs.npy').resolve()), 'wb') as f:
        np.save(
            file=f,
            arr=features_val,
        )

if save_latents:
    with open(str((Path(directory_save) / 'arr_latents.npy').resolve()), 'wb') as f:
        np.save(
            file=f,
            arr=features_val,
        )

labelingRun_interim['params_prespecified'] = params
with open(str((Path(directory_save) / 'pkl_labelingRunInterim.npy').resolve()), 'wb') as f:
    np.save(
        file=f,
        arr=labelingRun_interim,
        allow_pickle=True
    )

print(f'Saving files completed.')
