## Detector training process

In [None]:
import os, sys
import pickle as pkl
import pandas as pd
import numpy as np
from glob import glob
sys.path.append(os.path.abspath('./../../'))
from cell_extractor.CellDetectorBase import CellDetectorBase
from cell_extractor.CellDetectorTrainer import CellDetectorTrainerDK55, CellDetectorTrainer
from cell_extractor.CellDetector import CellDetector

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
def detection_plot(detections, labeled_data, title):
    fig = plt.figure(figsize=(8,5))
    # ax = fig.add_subplot(1, 1, 1)
    sns.kdeplot(x=detections['mean_score'], color='r')
    plt.yscale('log')
    plt.ylim([1e-6,1])
    plt.xlabel("Average value of Scores")
    # plt.legend(loc='upper left')
    left_yaxis = plt.gca().get_yaxis()
    left_yaxis.label.set_color('red')
    left_yaxis.set_tick_params(labelcolor='red', color='red')

    ax2 = plt.twinx()
    labeled_data.loc[labeled_data['label']==0, 'label'] = 'Negative'
    labeled_data.loc[labeled_data['label']==1, 'label'] = 'Positive'
    sns.scatterplot(x=labeled_data['mean_score'], y=labeled_data['std_score'],hue=labeled_data['label'],s=5,alpha=0.7)
    plt.title('Distributions of Detections of %s'%title)
    # plt.xlabel("Mean of Scores")
    plt.ylabel("Standard Deviation of Scores", rotation=270, labelpad=20)
    plt.ylim([0,5])
    plt.xlim([-25,15])
    plt.vlines(-1.5,ymin=0,ymax=5,ls='--',colors='k',label='Negative-unsure threshold')
    plt.vlines(1.5,ymin=0,ymax=5,ls='--',colors='k',label='Sure-unsure threshold')
    plt.text(0, 0.3, 'Unsure', ha='center', va='center')
    plt.text(8.25, 0.3, 'Sure', ha='center', va='center')
    plt.text(-8.25, 0.3, 'Negative', ha='center', va='center')
    legend = plt.legend(loc='upper right')
    legend.get_frame().set_alpha(0.5)
    plt.grid()
    plt.gca().spines['left'].set_color('red')
    # plt.savefig(f'/scratch/k1qian/Output/{title}_Distribution.eps', format='eps')
    plt.show()
        

## Detector 1

Use unaided annotations from DK55 as the training set.

In [None]:
generator = CellDetectorTrainerDK55('DK55',round=2,segmentation_threshold=2000)
train_features = generator.load_refined_original_feature()

In [None]:
len(train_features)

In [None]:
trainer = CellDetectorTrainer('DK55',round=1)
new_models = trainer.train_classifier(train_features,676,3) # training iteration = 676, depth of XGBoost trees = 3
trainer.save_models(new_models)

In [None]:
detector = CellDetector('DK41',round=1)
# detector.detector.model = pkl.load(open('/scratch/k1qian/Cell_Detectors/detectors_new.pkl', 'rb'))
detector.calculate_and_save_detection_results()


In [None]:
detections = detector.load_detections()
sures = detections[detections.predictions==2]
unsures = detections[detections.predictions==0]
len(sures),len(unsures)

In [None]:
generator = CellDetectorTrainerDK55('DK55',round=2,disk = '/net/birdstore/Active_Atlas_Data/')
train_features = generator.load_refined_original_feature()

In [None]:
detector = CellDetector('DK55',round=1,disk = '/net/birdstore/Active_Atlas_Data/')
detector.detector = pkl.load(open('/net/birdstore/Active_Atlas_Data/cell_segmentation/detectors/detector_round_1.pkl', 'rb'))
scores,labels,_mean,_std = detector.detector.calculate_scores(train_features)
train_features['mean_score'],train_features['std_score'] = _mean,_std

## Prepare training samples for Detector 2

In [None]:
detector = CellDetector('DK55',round=1)
scores,labels,_mean,_std = detector.detector.calculate_scores(train_features)
train_features['mean_score'],train_features['std_score'] = _mean,_std

In [None]:
train_features.head()

In [None]:
train_features[(train_features['mean_score']>1.5) & (train_features['label']==1)]

In [None]:
detections = detector.load_detections()
sures = detections[detections.predictions==2]
unsures = detections[detections.predictions==0]
len(sures),len(unsures)

In [None]:
features = detector.get_combined_features_for_detection()
scores,labels,_mean,_std = detector.detector.calculate_scores(features)

In [None]:
detection_df = detector.get_combined_features()
detection_df['mean_score'],detection_df['std_score'] = _mean,_std
all_segment = np.array([detection_df.col,detection_df.row,detection_df.section]).T

In [None]:
test_counts,train_sections = pkl.load(open('/net/birdstore/Active_Atlas_Data/cell_segmentation/DK55/features/categories_round1.pkl','rb'))
original = train_sections['original training set after mind change']
original = np.array([[ci[1]['x'],ci[1]['y'],ci[1]['section']] for ci in original])
original_index = generator.find_cloest_neighbor_among_points(all_segment,original)

In [None]:
neg = 'DK55_premotor_manual_negative_round1_2021-12-09.csv'
pos = 'DK55_premotor_manual_positive_round1_2021-12-09.csv'
neg = pd.read_csv(neg,header=None).to_numpy()
pos = pd.read_csv(pos,header=None).to_numpy()
positive = generator.find_cloest_neighbor_among_points(all_segment,pos)
negative = generator.find_cloest_neighbor_among_points(all_segment,neg)
len(positive),len(negative)

In [None]:
sure = list(detection_df[detection_df['mean_score']>1.5].index)

In [None]:
len(list(set(sure).difference(set(original_index+positive+negative))))

In [None]:
len(list(set(sure) & set(original_index+positive+negative)))

In [None]:
len(set(sure_id+original_index+positive)), len(sure_id+original_index+positive)

In [None]:
sure_id = [index for index in sure if index not in original_index+positive+negative]
dirs=glob('/net/birdstore/Active_Atlas_Data/cell_segmentation/DK55/CH3/*/DK55*.csv') 
manual_sections = [int(i.split('/')[-2]) for i in dirs]
labels = np.zeros(len(features))
positive_index = sure_id+original_index+positive
for i in positive_index:
    labels[i] = 1
include = [labels[i]==1 or i in negative or all_segment[i,2] in manual_sections for i in range(len(features))]


In [None]:
features['label'] = labels

In [None]:
detection_df['label'] = labels

In [None]:
train_features_r2 = features[include]

In [None]:
train_features_r2

In [None]:
pkl.dump(train_features_r2, open('/scratch/k1qian/TrainingData_v2/round2_training_features.pkl', 'wb'))

## Detector 2

In [None]:
trainer = CellDetectorTrainer('DK55',round=2)
new_models = trainer.train_classifier(train_features_r2,676,3)
trainer.save_models(new_models)

In [None]:
detector = CellDetector('DK41',round=2)
# detector.detector.model = pkl.load(open('/scratch/k1qian/Cell_Detectors/detectors_new.pkl', 'rb'))
detector.calculate_and_save_detection_results()

In [None]:
detector = CellDetector('DK41',round=2)

In [None]:
detections = detector.load_detections()
sures = detections[detections.predictions==2]
unsures = detections[detections.predictions==0]
len(sures),len(unsures)

In [None]:
train_features_r2 = pkl.load(open('/scratch/k1qian/TrainingData_v2/round2_training_features.pkl', 'rb'))

In [None]:
detector = CellDetector('DK55',round=2)
scores,labels,_mean,_std = detector.detector.calculate_scores(train_features_r2)
train_features_r2['mean_score'],train_features_r2['std_score'] = _mean,_std

In [None]:
detection_plot(detections, train_features_r2, 'Detector2')

## Detector 3

In [None]:
train_features = pkl.load(open('/scratch/k1qian/TrainingData_v2/round2_training_features.pkl', 'rb'))
features = pkl.load(open('/scratch/k1qian/TrainingData_v2/round3_training_features.pkl', 'rb'))
train_features_r3 = pd.concat([features,train_features])

In [None]:
trainer = CellDetectorTrainer('DK41',round=3)
new_models = trainer.train_classifier(train_features_r3,676,3)
trainer.save_models(new_models)

In [None]:
detector = CellDetector('DK62',round=3)
# detector.detector.model = pkl.load(open('/scratch/k1qian/Cell_Detectors/detectors_new.pkl', 'rb'))
detector.calculate_and_save_detection_results()

In [None]:
detections = detector.load_detections()
sures = detections[detections.predictions==2]
unsures = detections[detections.predictions==0]
len(sures),len(unsures)

In [None]:
detector = CellDetector('DK41',round=3)
scores,labels,_mean,_std = detector.detector.calculate_scores(features)
features['mean_score'],features['std_score'] = _mean,_std

In [None]:
detection_plot(detections, features, 'Detector3')

## Detector 4

In [None]:
train_features = pkl.load(open('/scratch/k1qian/TrainingData_v2/round2_training_features.pkl', 'rb'))
features = pkl.load(open('/scratch/k1qian/TrainingData_v2/round3_training_features.pkl', 'rb'))
train_features = pd.concat([features,train_features])
features = pkl.load(open('/scratch/k1qian/TrainingData_v2/round4_training_features.pkl','rb'))
train_features_r4 = pd.concat([features,train_features])

In [None]:
trainer = CellDetectorTrainer('DK41',round=4)
new_models = trainer.train_classifier(train_features_r4,676,3)
trainer.save_models(new_models)

In [None]:
detector = CellDetector('DK62',round=4)
# detector.detector.model = pkl.load(open('/scratch/k1qian/Cell_Detectors/detectors_new.pkl', 'rb'))
detector.calculate_and_save_detection_results()

## Detector 5

In [None]:
old_train_features = pkl.load(open('/scratch/k1qian/TrainingData_v2/round2_training_features.pkl','rb'))
train_features = pkl.load(open('/scratch/k1qian/TrainingData_v2/round3_training_features.pkl','rb'))
train_features = pd.concat([old_train_features,train_features])
nega_features = pkl.load(open('/scratch/k1qian/TrainingData_v2/round4_training_features.pkl','rb'))
train_features = pd.concat([train_features,nega_features])
qc_features = pkl.load(open('/scratch/k1qian/TrainingData_v2/DK62_QC_features.pkl','rb'))
qc_features = pd.concat([qc_features]*4)
train_features = pd.concat([train_features,qc_features])
beth_features = pkl.load(open('/scratch/k1qian/TrainingData_v2/DK62_beth_features.pkl','rb'))
train_features = pd.concat([train_features,beth_features])


In [None]:
trainer = CellDetectorTrainer('DK41',round=5)
new_models = trainer.train_classifier(train_features,676,3)
trainer.save_models(new_models)

In [None]:
detector = CellDetector('DK62',round=5)
# detector.detector.model = pkl.load(open('/scratch/k1qian/Cell_Detectors/detectors_new.pkl', 'rb'))
detector.calculate_and_save_detection_results()

In [None]:
detections = detector.load_detections()
sures = detections[detections.predictions==2]
unsures = detections[detections.predictions==0]
len(sures),len(unsures)

## Detector 6

In [None]:
old_train_features = pkl.load(open('/scratch/k1qian/TrainingData_v2/round2_training_features.pkl','rb'))
train_features = pkl.load(open('/scratch/k1qian/TrainingData_v2/round3_training_features.pkl','rb'))
train_features = pd.concat([old_train_features,train_features])
nega_features = pkl.load(open('/scratch/k1qian/TrainingData_v2/round4_training_features.pkl','rb'))
train_features = pd.concat([train_features,nega_features])
qc_features = pkl.load(open('/scratch/k1qian/TrainingData_v2/DK62_QC_features.pkl','rb'))
qc_features = pd.concat([qc_features]*4)
train_features = pd.concat([train_features,qc_features])
beth_features = pkl.load(open('/scratch/k1qian/TrainingData_v2/DK62_beth_features.pkl','rb'))
train_features = pd.concat([train_features,beth_features])
qc_features = pkl.load(open('/scratch/k1qian/TrainingData_v2/DK50_QC_features.pkl','rb'))
qc_features = pd.concat([qc_features]*4)
train_features = pd.concat([train_features,qc_features])
beth_features = pkl.load(open('/scratch/k1qian/TrainingData_v2/DK50_beth_features.pkl','rb'))
train_features = pd.concat([train_features,beth_features])


In [None]:
trainer = CellDetectorTrainer('DK41',round=6)
new_models = trainer.train_classifier(train_features,676,3)
trainer.save_models(new_models)

In [None]:
detector = CellDetector('DK62',round=6)
# detector.detector.model = pkl.load(open('/scratch/k1qian/Cell_Detectors/detectors_new.pkl', 'rb'))
detector.calculate_and_save_detection_results()

In [None]:
detections = detector.load_detections()
sures = detections[detections.predictions==2]
unsures = detections[detections.predictions==0]
len(sures),len(unsures)