In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

In [2]:
import sys
import os
import copy
import pathlib

import numpy as np
import matplotlib.pyplot as plt
import scipy

import torch
import torchvision
import torchvision.transforms as transforms

# for creating validation set
from sklearn.model_selection import train_test_split

%matplotlib notebook
%matplotlib notebook
# %matplotlib inline
# %matplotlib qt

In [4]:
import sys

# sys.path.append('/Users/Josh/Documents/github_repos/')
sys.path.append('/media/rich/Home_Linux_partition/github_repos/')

%load_ext autoreload
%autoreload 2
from basic_neural_processing_modules import torch_helpers, decomposition
from GCaMP_ROI_classifier.old_stuff import util, models, training_simCLR, augmentation

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Import training data

In [5]:
import pickle

# with open('/Users/Josh/Documents/Harvard/label_data/all_stat_files_20211022.pkl', 'rb') as file:
with open('/media/rich/bigSSD/for_Josh/GCAMP_ROI_Classifier_data/label_data/all_stat_files_20211022.pkl', 'rb') as file:
    statFiles_scraped = pickle.load(file)
    

In [6]:
keys_toDelete = [
    '\\\\research.files.med.harvard.edu\\Neurobio\\MICROSCOPE\\Rich\\data\\res2p\\scanimage data\\for Loic\\16x_analysis20200514\\Suite2p and MC for denoised movie\\suite2p\\plane0\\stat.npy',
    '\\\\research.files.med.harvard.edu\\Neurobio\\MICROSCOPE\\Rich\\data\\res2p\\scanimage data\\round 4 experiments\\mouse 6.28\\20201102\\suite2p_accidentally_run_on_everything\\plane0\\stat.npy',
    '\\\\research.files.med.harvard.edu\\Neurobio\\MICROSCOPE\\Rich\\data\\res2p\\scanimage data\\for Loic\\20x\\mouse 2.5\\20200306\\suite2p attemp 2 _tau 1_5\\plane0\\stat.npy',
    '\\\\research.files.med.harvard.edu\\Neurobio\\MICROSCOPE\\Rich\\data\\res2p\\scanimage data\\for Loic\\20x\\mouse 2.5\\20200306\\suite2p attempt 1 _tau 2\\plane0\\stat.npy',
    '\\\\research.files.med.harvard.edu\\Neurobio\\MICROSCOPE\\Rich\\data\\res2p\\scanimage data\\for Loic\\20x\\mouse 2.5\\20200306\\suite2p attempt 3 _tau 1\\plane0\\stat.npy',
    '\\\\research.files.med.harvard.edu\\Neurobio\\MICROSCOPE\\Rich\\data\\res2p\\scanimage data\\for Loic\\AAV\\oldBMIrampExp_20191112_mouse1013A\\ROI extraction\\suite2p\\plane0\\stat.npy',
]
for key in keys_toDelete:
    del statFiles_scraped[key]

In [7]:
sf_list = util.convert_multiple_stat_files(statFiles_dict=statFiles_scraped, out_height_width=[36,36], max_footprint_width=901, plot_pref=False)

images_all = np.concatenate(sf_list, axis=0)

images_all.shape

(217802, 36, 36)

In [8]:
images_all_norm = (images_all / np.sum(images_all, axis=(1,2), keepdims=True)) * 255

In [9]:
# create validation set
# X_train, X_val, y_train, y_val = train_test_split(images[:], labels[:], test_size = 0.15)
X_train, X_val, y_train, y_val = train_test_split(images_all_norm, np.arange(images_all.shape[0]), test_size = 0.15)
(X_train.shape, y_train.shape), (X_val.shape, y_val.shape)

(((185131, 36, 36), (185131,)), ((32671, 36, 36), (32671,)))

In [10]:
dir_folders = r'/media/rich/bigSSD/for_Josh/GCAMP_ROI_Classifier_data/label_data'
# dir_folders = r'/users/Josh/Documents/Harvard/label_data'
folders = [r'mouse 6_28 _ day 20200903/',
             r'mouse6_28 _ day20200815/']
fileNames_statFiles = [r'stat.npy']*len(folders)
paths_statFiles = [pathlib.Path(dir_folders) / folders[ii] / fileNames_statFiles[ii] for ii in range(len(folders))]

statFile_import_kwargs = {'out_height_width': [36,36],
                          'max_footprint_width': 241,
                          'plot_pref':True}

sf_all = util.import_multiple_stat_files(   paths_statFiles=paths_statFiles,
                                        fileNames_statFiles=fileNames_statFiles,
                                        out_height_width=[36,36],
                                        max_footprint_width=241,
                                        plot_pref=True)
images_labeled = np.concatenate(sf_all, axis=0) *255
print(f'concatenated images shape: {images_labeled.shape}')

fileNames_labelFiles = ['labels_posthoc_filledIn_allCells.npy',
             'labels_posthoc_all.npy']
paths_labelFiles = [pathlib.Path(dir_folders) / folders[ii] / fileNames_labelFiles[ii] for ii in range(len(folders))]

labels_all = util.import_multiple_label_files(paths_labelFiles=paths_labelFiles,
                                       plot_pref=True)
labels = np.concatenate(labels_all)

assert np.alltrue([sf_all[ii].shape[0] == labels_all[ii].shape[0] for ii in range(len(sf_all))]) , 'num images in stat files does not correspond to num labels'

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

concatenated images shape: (9715, 36, 36)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Balance classes of inputs

In [11]:
duplicates = 1

images_dup = np.tile(images_labeled , (duplicates , 1 , 1))
labels_dup = np.tile(labels , (duplicates))

print(images_dup.shape)
print(labels_dup.shape)

numToGetTo = np.sum(labels_dup==0)
print(numToGetTo)

print(np.sum(labels_dup==0))
print(np.sum(labels_dup==1))
print(np.sum(labels_dup==4))
print(np.sum(labels_dup==5))
print(images_dup.shape)
print(labels_dup.shape)

(9715, 36, 36)
(9715,)
532
532
620
681
7881
(9715, 36, 36)
(9715,)


In [12]:
# create validation set
# X_train, X_val, y_train, y_val = train_test_split(images[:], labels[:], test_size = 0.15)
X_train, X_val, y_train, y_val = train_test_split(images_dup[:], labels_dup[:], test_size = 0.15)
(X_train.shape, y_train.shape), (X_val.shape, y_val.shape)

(((8257, 36, 36), (8257,)), ((1458, 36, 36), (1458,)))

In [13]:
plt.figure()
plt.hist(labels_dup, 20);

<IPython.core.display.Javascript object>

## Define augmentation pipeline

In [14]:
import torchvision.transforms    

transforms = torch.nn.Sequential(
#     torchvision.transforms.RandomHorizontalFlip(p=0.5),
    
#     torchvision.transforms.GaussianBlur(kernel_size=5,
#                                         sigma=(0.0001, 0.5)),
    
#     torchvision.transforms.RandomPerspective(distortion_scale=0.4, 
#                                              p=0.5, 
#                                              interpolation=torchvision.transforms.InterpolationMode.BILINEAR, 
#                                              fill=0),
#     torchvision.transforms.RandomAffine(
#                                         degrees=(-180,180),
#                                         translate=(0.3, 0.3),
#                                         scale=(0.7, 1.7), 
#                                         shear=(-30, 30, -30, 30), 
#                                         interpolation=torchvision.transforms.InterpolationMode.BILINEAR, 
#                                         fill=0, 
#                                         fillcolor=None, 
#                                         resample=None),
    
#     augmentation.AddPoissonNoise(   level_bounds=(0,0.7),
#                                     prob=1),

#     augmentation.AddGaussianNoise(  mean=0, 
#                                     std=0.4,
#                                     prob=1),
)
# scripted_transforms = torch.jit.script(transforms)
scripted_transforms = transforms



dataset_train = util.dataset_simCLR(torch.tensor(X_train), 
                                    y_train, 
                                    n_transforms=2, 
                                    transform=scripted_transforms,
                                    DEVICE='cpu',
                                    dtype_X=torch.float32,
                                    dtype_y=torch.int64 )

dataloader_train = torch.utils.data.DataLoader( dataset_train,
                                                batch_size=1024,
            #                                     sampler=sampler,
                                                shuffle=True,
                                                drop_last=True,
                                                pin_memory=False,
#                                                 num_workers=32,
                                                num_workers=0,
#                                                 num_workers=16,
#                                                 prefetch_factor=3,
#                                                 persistent_workers=True,
                                                )

# dataset_val = util.dataset_simCLR(torch.tensor(X_val), 
#                                     y_val, 
#                                     n_transforms=2, 
#                                     transform=scripted_transforms,
#                                     DEVICE='cpu',
#                                     dtype_X=torch.float32,
#                                     dtype_y=torch.int64 )

# dataloader_val = torch.utils.data.DataLoader(   dataset_val,
#                                                 batch_size=128,
#             #                                     sampler=sampler,
#                                                 shuffle=True,
#                                                 drop_last=True,
#                                                 pin_memory=True,
#                                                 num_workers=0,
#                                                 )         

In [15]:
import time

%matplotlib notebook
tic=time.time()
for ii, im in enumerate(dataset_train):
#     for jj in range(im[0])
    print(time.time() - tic)
    tic = time.time()
#     print((im[0][0]).shape)
    fig, axs = plt.subplots(len(im[0]))
    for jj, ax in enumerate(axs):
        ax.imshow(im[0][jj].cpu().squeeze())
    if ii > 2:
        break

0.0003616809844970703


<IPython.core.display.Javascript object>

0.02991318702697754


<IPython.core.display.Javascript object>

0.027866363525390625


<IPython.core.display.Javascript object>

0.027029991149902344


<IPython.core.display.Javascript object>

## Train

In [17]:
DEVICE = torch_helpers.set_device(use_GPU=True)

device: 'cuda'


In [35]:
from torch.nn import CrossEntropyLoss
from torch.optim import Adam

model = models.LeNet1(dropout_prob=0.3, momentum_val=0, n_output_features=128)
model.train()
# criterion = CrossEntropyLoss()
criterion = CrossEntropyLoss(reduction='none')

optimizer = Adam(model.parameters(), lr=1e-3, weight_decay=0.05)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer,
                                                   gamma=1-0.0005,
#                                                    gamma=1,
                                                  )
val_obj = training_simCLR.validation_Obj(   X_val,
                                            y_val,
                                            model,
                                            criterion,
                                            DEVICE=DEVICE,
                                            dtype_X=torch.float32,
                                            dtype_y=torch.int64) # Needs to take in weights

model.to(DEVICE)
criterion = criterion.to(DEVICE)

losses_train, losses_val, val_accs = [], [np.nan], []

In [50]:
import numpy as np
%load_ext autoreload
%autoreload 2
from GCaMP_ROI_classifier.old_stuff.training_classHead import HeadModel

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix

# classHead = HeadModel(model, RandomForestClassifier)
classHead = HeadModel(model, LogisticRegression)
dataset_train.headmodel = classHead

n_epochs=300000
for epoch in range(n_epochs):
    print(f'epoch: {epoch}')
    training_simCLR.epoch_step( dataloader_train, 
                                model, 
                                optimizer, 
                                criterion, 
                                scheduler=scheduler, 
                                temperature=0.5,
                                loss_rolling_train=losses_train, 
                                device=DEVICE, 
                                do_validation=False,
                                validation_Object=val_obj,
                                loss_rolling_val=losses_val,
                                verbose=2,
                                verbose_update_period=10,
                               )
    # model predict
    # Update model in DS
    # get item calls model for each sample
    # output
    # X sample weights predictions
    
    classHead.fit(X_train[:, None, :, :], y_train)
    
    proba = classHead.predict_proba(X_train[:, None, :, :])
#     class_weights = proba.sum(axis=0)
#     total_num = class_weights.sum()
    
#     eps = 1e-4
    
#     class_weights[class_weights <= 3] = total_num
#     weightings = class_weights.sum()/class_weights
#     final_weights = weightings / weightings.sum()
    final_weights = np.array([1/proba.shape[1] for _ in range(proba.shape[1])])
    

    dataset_train.set_classweights(final_weights)
    
    print('dataset_train.final_weights', dataset_train.class_weights)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
epoch: 0
double_sample_weights tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0')
Iter: 0/8, loss_train: 5.7645, loss_val: nan, lr: 0.00085041
double_sample_weights tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0')
double_sample_weights tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0')
double_sample_weights tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0')
double_sample_weights tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0')
double_sample_weights tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0')
double_sample_weights tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0')
double_sample_weights tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0')


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


dataset_train.final_weights [0.2 0.2 0.2 0.2 0.2]
epoch: 1
double_sample_weights tensor([0.2000, 0.3208, 0.3559,  ..., 0.2000, 0.2000, 0.2000], device='cuda:0',
       dtype=torch.float64)
Iter: 0/8, loss_train: 5.7633, loss_val: nan, lr: 0.00084701
double_sample_weights tensor([0.2000, 0.2141, 0.2000,  ..., 0.2000, 0.3308, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2000, 0.2000,  ..., 0.2000, 0.2004, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2000, 0.3521,  ..., 0.2000, 0.2000, 0.3454], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.3392, 0.2000,  ..., 0.2000, 0.2000, 0.3149], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2001, 0.2000, 0.2000,  ..., 0.2000, 0.2000, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2000, 0.2000,  ..., 0.2000, 0.3130, 0.2000], device='cuda:0'

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


dataset_train.final_weights [0.2 0.2 0.2 0.2 0.2]
epoch: 2
double_sample_weights tensor([0.3137, 0.3696, 0.2000,  ..., 0.2000, 0.2000, 0.2000], device='cuda:0',
       dtype=torch.float64)
Iter: 0/8, loss_train: 5.7539, loss_val: nan, lr: 0.00084363
double_sample_weights tensor([0.2000, 0.2000, 0.3198,  ..., 0.2000, 0.2000, 0.3027], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2000, 0.2000,  ..., 0.2000, 0.2000, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2546, 0.2176, 0.2000,  ..., 0.2008, 0.2033, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2001, 0.2001,  ..., 0.2000, 0.2000, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2000, 0.2000,  ..., 0.2000, 0.2000, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.3504, 0.2300, 0.2000,  ..., 0.3010, 0.3273, 0.2006], device='cuda:0'

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


dataset_train.final_weights [0.2 0.2 0.2 0.2 0.2]
epoch: 3
double_sample_weights tensor([0.2013, 0.2000, 0.2045,  ..., 0.2000, 0.2000, 0.2000], device='cuda:0',
       dtype=torch.float64)
Iter: 0/8, loss_train: 5.7509, loss_val: nan, lr: 0.00084026
double_sample_weights tensor([0.2000, 0.2000, 0.2103,  ..., 0.2011, 0.2000, 0.2715], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2000, 0.2276,  ..., 0.2074, 0.2000, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2668, 0.2885, 0.2000,  ..., 0.3527, 0.2000, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2000, 0.3080,  ..., 0.2603, 0.2000, 0.2001], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2000, 0.2000,  ..., 0.2000, 0.2007, 0.2019], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2000, 0.2000,  ..., 0.2000, 0.2042, 0.3233], device='cuda:0'

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


dataset_train.final_weights [0.2 0.2 0.2 0.2 0.2]
epoch: 4
double_sample_weights tensor([0.2000, 0.2000, 0.3510,  ..., 0.2000, 0.2000, 0.2000], device='cuda:0',
       dtype=torch.float64)
Iter: 0/8, loss_train: 5.7596, loss_val: nan, lr: 0.00083691
double_sample_weights tensor([0.3009, 0.2000, 0.2000,  ..., 0.2002, 0.2000, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2000, 0.2000,  ..., 0.2911, 0.2000, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2820, 0.2000, 0.2000,  ..., 0.2000, 0.2000, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2000, 0.3059,  ..., 0.2000, 0.3113, 0.2083], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2528, 0.2000, 0.2000,  ..., 0.2000, 0.2000, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2000, 0.2000,  ..., 0.2000, 0.2002, 0.2000], device='cuda:0'

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


dataset_train.final_weights [0.2 0.2 0.2 0.2 0.2]
epoch: 5
double_sample_weights tensor([0.2000, 0.2978, 0.2000,  ..., 0.3036, 0.2011, 0.2017], device='cuda:0',
       dtype=torch.float64)
Iter: 0/8, loss_train: 5.7611, loss_val: nan, lr: 0.00083356
double_sample_weights tensor([0.3022, 0.2001, 0.2289,  ..., 0.2000, 0.2000, 0.3534], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2466, 0.2791, 0.2000,  ..., 0.2000, 0.2000, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2000, 0.3055,  ..., 0.2000, 0.2000, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.3304, 0.3065, 0.2000,  ..., 0.2000, 0.3446, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2000, 0.2000,  ..., 0.2000, 0.3357, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.3076, 0.2000, 0.2000,  ..., 0.2000, 0.3340, 0.2000], device='cuda:0'

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


dataset_train.final_weights [0.2 0.2 0.2 0.2 0.2]
epoch: 6
double_sample_weights tensor([0.2000, 0.2511, 0.2717,  ..., 0.2000, 0.3439, 0.2000], device='cuda:0',
       dtype=torch.float64)
Iter: 0/8, loss_train: 5.766, loss_val: nan, lr: 0.00083023
double_sample_weights tensor([0.2160, 0.2002, 0.2001,  ..., 0.2951, 0.2000, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2005, 0.2000,  ..., 0.2010, 0.2000, 0.3105], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2954, 0.2000,  ..., 0.2000, 0.2000, 0.2972], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2279, 0.2207,  ..., 0.2000, 0.2000, 0.2809], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2000, 0.2000,  ..., 0.2005, 0.3145, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.3131, 0.2000, 0.2000,  ..., 0.2000, 0.2000, 0.2000], device='cuda:0',

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


dataset_train.final_weights [0.2 0.2 0.2 0.2 0.2]
epoch: 7
double_sample_weights tensor([0.2000, 0.2000, 0.2000,  ..., 0.2000, 0.2000, 0.3623], device='cuda:0',
       dtype=torch.float64)
Iter: 0/8, loss_train: 5.7552, loss_val: nan, lr: 0.00082692
double_sample_weights tensor([0.2000, 0.2934, 0.2000,  ..., 0.2085, 0.2001, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2643, 0.2000, 0.2000,  ..., 0.2516, 0.2000, 0.2430], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2772, 0.2925,  ..., 0.2289, 0.3604, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2006, 0.2004, 0.2000,  ..., 0.2000, 0.2000, 0.2001], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2001, 0.2698, 0.2380,  ..., 0.2000, 0.2000, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.3491, 0.2000,  ..., 0.2000, 0.2022, 0.2000], device='cuda:0'

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


dataset_train.final_weights [0.2 0.2 0.2 0.2 0.2]
epoch: 8
double_sample_weights tensor([0.2000, 0.2000, 0.2387,  ..., 0.2000, 0.2000, 0.3567], device='cuda:0',
       dtype=torch.float64)
Iter: 0/8, loss_train: 5.7469, loss_val: nan, lr: 0.00082362
double_sample_weights tensor([0.3371, 0.2000, 0.2001,  ..., 0.3336, 0.2000, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2000, 0.2004,  ..., 0.3474, 0.3677, 0.2005], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.3133, 0.2000, 0.2000,  ..., 0.2027, 0.2000, 0.3552], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.3670, 0.2751, 0.2896,  ..., 0.3329, 0.3092, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2000, 0.2000,  ..., 0.2006, 0.3081, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2595, 0.2000,  ..., 0.2728, 0.2973, 0.2000], device='cuda:0'

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


dataset_train.final_weights [0.2 0.2 0.2 0.2 0.2]
epoch: 9
double_sample_weights tensor([0.2000, 0.2000, 0.3611,  ..., 0.2000, 0.2000, 0.2008], device='cuda:0',
       dtype=torch.float64)
Iter: 0/8, loss_train: 5.754, loss_val: nan, lr: 0.00082033
double_sample_weights tensor([0.2000, 0.2662, 0.2000,  ..., 0.2000, 0.2804, 0.2001], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.3494, 0.2894, 0.2000,  ..., 0.2000, 0.2000, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2000, 0.2000,  ..., 0.2000, 0.3472, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2533, 0.3318, 0.2000,  ..., 0.2000, 0.2000, 0.2001], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2801, 0.2549,  ..., 0.2001, 0.3599, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.3117, 0.2000, 0.2019,  ..., 0.2013, 0.2000, 0.2979], device='cuda:0',

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


dataset_train.final_weights [0.2 0.2 0.2 0.2 0.2]
epoch: 10
double_sample_weights tensor([0.2000, 0.2000, 0.2000,  ..., 0.2000, 0.2000, 0.2000], device='cuda:0',
       dtype=torch.float64)
Iter: 0/8, loss_train: 5.7501, loss_val: nan, lr: 0.00081705
double_sample_weights tensor([0.2000, 0.2930, 0.2000,  ..., 0.3307, 0.3019, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2169, 0.2142, 0.3193,  ..., 0.2000, 0.2000, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2000, 0.2000,  ..., 0.2000, 0.2670, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2000, 0.2000,  ..., 0.2000, 0.2015, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2000, 0.2005,  ..., 0.3105, 0.3630, 0.3322], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2000, 0.2000,  ..., 0.2000, 0.2000, 0.2000], device='cuda:0

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


dataset_train.final_weights [0.2 0.2 0.2 0.2 0.2]
epoch: 11
double_sample_weights tensor([0.3588, 0.2000, 0.2000,  ..., 0.2000, 0.2000, 0.2910], device='cuda:0',
       dtype=torch.float64)
Iter: 0/8, loss_train: 5.7464, loss_val: nan, lr: 0.00081379
double_sample_weights tensor([0.2000, 0.3182, 0.2000,  ..., 0.2000, 0.2000, 0.2462], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2000, 0.2000,  ..., 0.2000, 0.2000, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2005, 0.3364,  ..., 0.2000, 0.2000, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2000, 0.2000,  ..., 0.2010, 0.2000, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2426, 0.2000, 0.2000,  ..., 0.2000, 0.2000, 0.3254], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.3351, 0.2000, 0.2001,  ..., 0.3667, 0.2000, 0.2000], device='cuda:0

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


dataset_train.final_weights [0.2 0.2 0.2 0.2 0.2]
epoch: 12
double_sample_weights tensor([0.2000, 0.3254, 0.2000,  ..., 0.2000, 0.2000, 0.2009], device='cuda:0',
       dtype=torch.float64)
Iter: 0/8, loss_train: 5.7506, loss_val: nan, lr: 0.00081054
double_sample_weights tensor([0.3386, 0.2000, 0.2000,  ..., 0.2000, 0.2000, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.3468, 0.2000, 0.3619,  ..., 0.2000, 0.2000, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2000, 0.2000,  ..., 0.2000, 0.2000, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2001, 0.2000, 0.2001,  ..., 0.2001, 0.3039, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2000, 0.2001,  ..., 0.2000, 0.2000, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.3084, 0.2000,  ..., 0.2000, 0.3172, 0.3388], device='cuda:0

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


dataset_train.final_weights [0.2 0.2 0.2 0.2 0.2]
epoch: 13
double_sample_weights tensor([0.2000, 0.2767, 0.2001,  ..., 0.2000, 0.2000, 0.2000], device='cuda:0',
       dtype=torch.float64)
Iter: 0/8, loss_train: 5.7451, loss_val: nan, lr: 0.00080731
double_sample_weights tensor([0.2000, 0.2000, 0.2000,  ..., 0.2000, 0.2000, 0.2407], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.3392, 0.2000, 0.2000,  ..., 0.2064, 0.2000, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2950, 0.2000, 0.2000,  ..., 0.3125, 0.2000, 0.2242], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2000, 0.2000,  ..., 0.2000, 0.2000, 0.3381], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2000, 0.2004,  ..., 0.2000, 0.2000, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2001, 0.2000,  ..., 0.2000, 0.2000, 0.2000], device='cuda:0

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


dataset_train.final_weights [0.2 0.2 0.2 0.2 0.2]
epoch: 14
double_sample_weights tensor([0.2000, 0.2000, 0.2000,  ..., 0.3785, 0.3496, 0.2000], device='cuda:0',
       dtype=torch.float64)
Iter: 0/8, loss_train: 5.7495, loss_val: nan, lr: 0.00080408
double_sample_weights tensor([0.2000, 0.2000, 0.2000,  ..., 0.2000, 0.2000, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.3478, 0.3190, 0.2000,  ..., 0.2001, 0.3637, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2716, 0.2000,  ..., 0.2000, 0.2000, 0.2003], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2000, 0.2307,  ..., 0.3555, 0.2000, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2000, 0.2000,  ..., 0.2000, 0.2123, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.3442, 0.2000,  ..., 0.2000, 0.2000, 0.2000], device='cuda:0

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


dataset_train.final_weights [0.2 0.2 0.2 0.2 0.2]
epoch: 15
double_sample_weights tensor([0.2000, 0.3351, 0.3465,  ..., 0.3367, 0.3556, 0.2022], device='cuda:0',
       dtype=torch.float64)
Iter: 0/8, loss_train: 5.7487, loss_val: nan, lr: 0.00080087
double_sample_weights tensor([0.2000, 0.2000, 0.2000,  ..., 0.3522, 0.2000, 0.2072], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2001, 0.2000, 0.2000,  ..., 0.2000, 0.3089, 0.2146], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2162, 0.2000,  ..., 0.2000, 0.2000, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2644, 0.3143, 0.3241,  ..., 0.2000, 0.2000, 0.2002], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2000, 0.2000,  ..., 0.2002, 0.3643, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.3001, 0.2067,  ..., 0.3083, 0.2000, 0.2000], device='cuda:0

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


dataset_train.final_weights [0.2 0.2 0.2 0.2 0.2]
epoch: 16
double_sample_weights tensor([0.3385, 0.2000, 0.2000,  ..., 0.2000, 0.2000, 0.2000], device='cuda:0',
       dtype=torch.float64)
Iter: 0/8, loss_train: 5.7708, loss_val: nan, lr: 0.00079767
double_sample_weights tensor([0.2000, 0.2000, 0.2000,  ..., 0.2000, 0.2000, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2349, 0.2000,  ..., 0.2000, 0.2112, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2892, 0.3429, 0.2439,  ..., 0.2074, 0.2014, 0.3437], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.3385, 0.2000, 0.2000,  ..., 0.2002, 0.2000, 0.2055], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.3395, 0.2000,  ..., 0.2027, 0.2000, 0.2069], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.3180, 0.2000, 0.2000,  ..., 0.2000, 0.2632, 0.2000], device='cuda:0

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


dataset_train.final_weights [0.2 0.2 0.2 0.2 0.2]
epoch: 17
double_sample_weights tensor([0.2000, 0.2004, 0.2000,  ..., 0.2000, 0.2064, 0.3706], device='cuda:0',
       dtype=torch.float64)
Iter: 0/8, loss_train: 5.7721, loss_val: nan, lr: 0.00079449
double_sample_weights tensor([0.2936, 0.3252, 0.2000,  ..., 0.2000, 0.2000, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.3165, 0.2018,  ..., 0.2000, 0.2032, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2013, 0.2000, 0.2001,  ..., 0.2000, 0.2000, 0.2991], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2023, 0.2000, 0.2000,  ..., 0.2000, 0.3507, 0.2002], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2000, 0.2000,  ..., 0.2000, 0.2454, 0.2000], device='cuda:0',
       dtype=torch.float64)
double_sample_weights tensor([0.2000, 0.2000, 0.2958,  ..., 0.2000, 0.2000, 0.2625], device='cuda:0

KeyboardInterrupt: 

## Check embeddings

In [118]:
torch.cuda.empty_cache()

In [47]:
features = model(torch.tensor(images_labeled, device=DEVICE, dtype=torch.float32)[:,None,...]).detach()
# features = model(torch.tensor(X_val, device=DEVICE, dtype=torch.float32)[:,None,...]).detach()
# features = model(torch.tensor(X_train, device=DEVICE, dtype=torch.float32)[:,None,...]).detach()

In [120]:
_, features_embedded, _, _ = decomposition.torch_pca(features, device=DEVICE, return_cpu=True)

In [48]:
from sklearn import manifold

tsne = manifold.TSNE(n_components=2, 
                     perplexity=90.0, 
                     early_exaggeration=12.0, 
                     learning_rate=200, 
                     n_iter=1000, 
                     n_iter_without_progress=300, 
                     min_grad_norm=1e-07, 
                     metric='euclidean', 
                     init='pca', 
                     verbose=0, 
                     random_state=None, 
                     method='barnes_hut', 
                     angle=0.5, 
                     n_jobs=-1, 
#                      square_distances='legacy'
                    )
features_embedded = tsne.fit_transform(features.cpu())

In [123]:
import matplotlib as mpl

mpl.rcParams['image.cmap'] = 'Set1'
%matplotlib notebook
plt.figure()
plt.scatter(features_embedded[:,0], features_embedded[:,1], c=labels)
# plt.scatter(features_embedded[:,0], features_embedded[:,1], c=y_val)
# plt.scatter(features_embedded[:,4], features_embedded[:,5], c=y_train)
# plt.scatter(features_embedded[:,11], features[:,43].cpu(), c=y_train)
mpl.rcParams['image.cmap'] = 'viridis'


<IPython.core.display.Javascript object>

In [49]:
import matplotlib as mpl

mpl.rcParams['image.cmap'] = 'Set1'
%matplotlib notebook
plt.figure()
plt.scatter(features_embedded[:,0], features_embedded[:,1], c=labels)
# plt.scatter(features_embedded[:,0], features_embedded[:,1], c=y_val)
# plt.scatter(features_embedded[:,4], features_embedded[:,5], c=y_train)
# plt.scatter(features_embedded[:,11], features[:,43].cpu(), c=y_train)
mpl.rcParams['image.cmap'] = 'viridis'


<IPython.core.display.Javascript object>

## Check filters

In [124]:
list(model.state_dict().keys())

['cnn_layers.0.weight',
 'cnn_layers.0.bias',
 'cnn_layers.2.weight',
 'cnn_layers.2.bias',
 'cnn_layers.5.weight',
 'cnn_layers.5.bias',
 'cnn_layers.8.weight',
 'cnn_layers.8.bias',
 'linear_layers.0.weight',
 'linear_layers.0.bias',
 'linear_layers.2.weight',
 'linear_layers.2.bias']

In [125]:
layer_1 = model.state_dict()['cnn_layers.0.weight'].cpu()
layer_2 = model.state_dict()['cnn_layers.2.weight'].cpu()
layer_3 = model.state_dict()['cnn_layers.5.weight'].cpu()
layer_4 = model.state_dict()['cnn_layers.8.weight'].cpu()

plt.figure()
for ii in range(layer_1.shape[1]):
    for jj in range(layer_1.shape[0]):
        plt.subplot2grid((layer_1.shape[1],layer_1.shape[0]),(ii,jj))
        fig = plt.imshow(layer_1[jj,ii,:,:] , clim=(-0.2,0.2))
        fig.axes.get_xaxis().set_visible(False)
        fig.axes.get_yaxis().set_visible(False)

plt.figure()
for ii in range(layer_2.shape[1]):
    for jj in range(layer_2.shape[0]):
        plt.subplot2grid((layer_2.shape[1],layer_2.shape[0]),(ii,jj))
        fig = plt.imshow(layer_2[jj,ii,:,:], clim=(-.05,.05))
        fig.axes.get_xaxis().set_visible(False)
        fig.axes.get_yaxis().set_visible(False)

plt.figure()
for ii in range(16):
    for jj in range(16):
        plt.subplot2grid((16,16),(ii,jj))
        fig = plt.imshow(layer_3[jj, ii,:,:])
        fig.axes.get_xaxis().set_visible(False)
        fig.axes.get_yaxis().set_visible(False)
        
plt.figure()
for ii in range(16):
    for jj in range(16):
        plt.subplot2grid((16,16),(ii,jj))
        fig = plt.imshow(layer_4[jj, ii,:,:])
        fig.axes.get_xaxis().set_visible(False)
        fig.axes.get_yaxis().set_visible(False)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [126]:
# torch.save(model.state_dict(), '/media/rich/bigSSD/Net_trainedOnAug_20211025_trainingSet_mouse628_20200903and20200815_simCLR.pth')

In [127]:
# model = Net()
# model.load_state_dict(torch.load('test_save.pth'))
# model.eval()