In [12]:

import pandas as pd
import tifffile
import random
import numpy as np
 
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

### Read the csv of the ground truth as a pandas dataframe. Here we are extracting centroid locations for all timepoints. This will be used to train and test LDA performance

In [13]:
##upload ground truth labels for generating training data for LDA
ground1=pd.read_csv('Substacks_with_SpineROIs/a1_tp1.csv')
ground2 = pd.read_csv('Substacks_with_SpineROIs/a2_tp2.csv')
ground3 = pd.read_csv("Substacks_with_SpineROIs/a3_tp3.csv")
ground4 = pd.read_csv('Substacks_with_SpineROIs/a4_tp4.csv')

print(len(ground1))
print(len(ground2))
print(len(ground3))
print(len(ground4))

222
195
218
207


In [14]:
### Change the panadas dataframe to numpy array and arrange the columns as Z,Y,X

ground_truth1 = np.asarray(ground1[['Z', 'Y', 'X']])
ground_truth2 = np.asarray(ground2[['Z', 'Y', 'X']])
ground_truth3 = np.asarray(ground3[['Z', 'Y', 'X']])
ground_truth4 = np.asarray(ground4[['Z', 'Y', 'X']])
ground_truth1



array([[  3,  50,   6],
       [  2,   9,  39],
       [  3,   6,  30],
       [  2,  27,  28],
       [  2,  28,  37],
       [  2,  43,  22],
       [  2,  65,  24],
       [  2,   0,  70],
       [  2,  99,   5],
       [  2,  99,  35],
       [  2,  63,  35],
       [  2,  37,  48],
       [  3, 155,  15],
       [  2, 178,  -1],
       [  2, 167,  12],
       [  2, 200,  17],
       [  3, 205,  38],
       [  2, 220,  56],
       [  2, 232,  40],
       [  2, 232,  50],
       [  2, 254,  10],
       [  3, 279,   9],
       [  2, 305,   1],
       [  2, 292,  44],
       [  3, 340,  26],
       [  2, 280,   1],
       [  3, 326,  29],
       [  2, 317,   0],
       [  3, 330,   4],
       [  3, 328,  70],
       [  2, 310,  65],
       [  3, 348,  12],
       [  3, 246,  58],
       [  3, 265,  72],
       [  2, 247,  68],
       [  2, 237,  59],
       [  2, 252,  76],
       [  3, 226,  86],
       [  3, 229,  99],
       [  3, 152,  45],
       [  2, 150,  55],
       [  2,  69

### The csv given, the z-slices range from 1-16, but need to range from 0-15.

In [15]:
ground_truth1[:,0] = ground_truth1[:,0] - 1 ## -1 to make sure the dummy array index and ground_truth match
ground_truth2[:,0] = ground_truth2[:,0] - 1
ground_truth3[:,0] = ground_truth3[:,0] - 1
ground_truth4[:,0] = ground_truth4[:,0] - 1

print(ground_truth1)
print(ground_truth2)
print(ground_truth3)
print(ground_truth4)
len(ground_truth1)

[[  2  50   6]
 [  1   9  39]
 [  2   6  30]
 [  1  27  28]
 [  1  28  37]
 [  1  43  22]
 [  1  65  24]
 [  1   0  70]
 [  1  99   5]
 [  1  99  35]
 [  1  63  35]
 [  1  37  48]
 [  2 155  15]
 [  1 178  -1]
 [  1 167  12]
 [  1 200  17]
 [  2 205  38]
 [  1 220  56]
 [  1 232  40]
 [  1 232  50]
 [  1 254  10]
 [  2 279   9]
 [  1 305   1]
 [  1 292  44]
 [  2 340  26]
 [  1 280   1]
 [  2 326  29]
 [  1 317   0]
 [  2 330   4]
 [  2 328  70]
 [  1 310  65]
 [  2 348  12]
 [  2 246  58]
 [  2 265  72]
 [  1 247  68]
 [  1 237  59]
 [  1 252  76]
 [  2 226  86]
 [  2 229  99]
 [  2 152  45]
 [  1 150  55]
 [  1  69  84]
 [  1   0 103]
 [  1  29 121]
 [  1  37 115]
 [  1  67 122]
 [  1 163  94]
 [  1 173 105]
 [  2 175 115]
 [  1 185 100]
 [  2 196 109]
 [  2 196 136]
 [  2 206 134]
 [  1 190  51]
 [  2 203  56]
 [  1 351  26]
 [  1 310 108]
 [  1 259 114]
 [  1 270  52]
 [  2 133  88]
 [  1   4   3]
 [  1  42   0]
 [  1 145 138]
 [  1 164 155]
 [  2 197  84]
 [  2 222 121]
 [  2 226 

222

### We make sure that synapse cubes do not overalp with non-synapses in training and testing set
Extract centroid locations of known synapses and mark an area around the synapses 

In [16]:
dummy_array = np.zeros((16, 359, 359))##to keep track of the centroids and the area around it to make sure training 
##set doesn't have any overlapping areas. We need 359x359 by 16 arrays. In here provide the dimensions of tiff file 
## you are working with in z,y,x format
dummy_array.shape
dummy_array[0,0,0]

0.0

### Finally need to extract non-synapses that don't overlap with synapse locations or their cubes

In [22]:
def finding_non_syn(size_cube, centroid_locations, array): ##need to make cubes around our synapses to ensure they don't overlap with 
##non-synapses
     
    try: 
        for z, y ,x in zip(centroid_locations[:,0], centroid_locations[:,1], centroid_locations[:,2]):
            for l in range(y-size_cube,y+size_cube): ## 4.84 microns cubed cube is formed by a 22x22 rectangle around the centroid locations
                for m in range(x-size_cube,x+size_cube):
                    array[z][l][m] = 1
    except:
        pass
        
    num = 0 
    z_loc = []
    y_loc = []
    x_loc = []
    while num < len(centroid_locations):
    
        x_ns = np.random.randint(size_cube,359-size_cube)##discarding points within 5 pixels from boundary of 359x359x16 
    ##pixels because want to generate a 4.84 cube for mean intensity. Also, x and y values range from 0-358.
        y_ns = np.random.randint(size_cube,359-size_cube)
        z_ns = np.random.randint(0,16)
        if array[z_ns][y_ns][x_ns] ==1 or array[z_ns][y_ns-size_cube][x_ns-size_cube]==1 or array[z_ns][y_ns+size_cube][x_ns+size_cube]==1 or array[z_ns][y_ns-size_cube][x_ns+size_cube]==1 or array[z_ns][y_ns+size_cube][x_ns-size_cube]==1:
            continue
        z_loc.append(z_ns)
        y_loc.append(y_ns)
        x_loc.append(x_ns)
        for s in range(y_ns-size_cube,y_ns+size_cube): ## 4.84 microns cubed cube is formed by a 10x10 rectangle around the centroid locations
            for p in range(x_ns-size_cube,x_ns+size_cube):
                array[z_ns][s][p] = 1
        num += 1
        

            
            
        
    
            
    
    return np.asarray((z_loc,y_loc,x_loc)).T
    

updated_dummy = finding_non_syn(5, ground_truth1, dummy_array) ##this is the new non_syn training set

updated_dummy

array([[  0, 146, 202],
       [  9, 194, 225],
       [ 12, 169, 258],
       [  9, 189, 204],
       [ 11,  69, 122],
       [  7, 273,  52],
       [  5, 151, 129],
       [  8,  10, 269],
       [  7, 334, 189],
       [ 11,  28, 300],
       [  1, 240, 244],
       [  8,  16, 343],
       [  7, 194, 302],
       [  7, 292, 195],
       [  4,  49, 167],
       [  3,  73,  44],
       [ 15, 189,  49],
       [  8, 205, 140],
       [  2, 296, 249],
       [ 13, 329, 292],
       [  1,  22, 285],
       [  5, 206, 197],
       [  6, 310, 312],
       [  7, 274, 190],
       [ 13, 219, 265],
       [  1, 330, 285],
       [  6, 258,  54],
       [ 11, 200, 288],
       [  3, 335, 156],
       [ 10, 184, 284],
       [  6, 310, 108],
       [  3,  54, 152],
       [  3,  41, 244],
       [  9, 311,  40],
       [  1, 172, 271],
       [ 14, 278, 190],
       [ 14, 105, 225],
       [  0, 153, 175],
       [  6, 152, 330],
       [ 10,  13,  96],
       [  6, 321, 185],
       [ 13, 283

### Finally finding the mean intensity of the cubes around the centroids generated

In [18]:
im1 = tifffile.imread('Substacks_with_SpineROIs/R04_tp1_substack (17-32).tif') ## read the tiff file to extract the intensity values from 

In [19]:
def generating_intensity(samples, cube_size):
    try:
        all_intensity = []
        for z, y, x in zip(samples[:,0], samples[:,1], samples[:,2]):##take the z,x,y locations to iterate over
            intensity = []    
            for l in range(y-cube_size,y+cube_size): ## 1 microns cubed cube is formed by a 22x22 rectangle around the centroid locations
                for m in range(x-cube_size,x+cube_size):
                    intensity.append(im1[z,l,m])
            
            mean_intensity = np.mean(intensity)
            all_intensity.append(mean_intensity)
    except:
        pass
    return all_intensity


### Generating intensities for synapses and non-synapses

In [20]:
syn_in= generating_intensity(ground_truth1, 5)
syn_intensity = np.array(syn_in).reshape((222,1)) ## reshape the list to be a 2D array
syn_intensity.shape

(222, 1)

In [21]:
non_syn_in= generating_intensity(updated_dummy, 5)
non_syn_intensity = np.array(non_syn_in).reshape((222,1))
non_syn_intensity.shape

(222, 1)

### Splitting the intensity arrays into training and testing:
Decide how much the intensity set will be split into testing and training set

In [31]:
##training set, need the 1st 74 rows choosing 33% of the data to be training set
train_syn = syn_intensity[:74]
train_non_syn = non_syn_intensity[:74]
    

    
##Testing set, take next rows 
test_syn= master_data[74:]
test_non_syn = master_data[74:]

    
    
    ### The final training set:
train_data = np.vstack((train_syn, train_non_syn))
test_data = np.vstack((test_syn, test_non_syn))
    ### The final label generation, synapses belong to class 1, non-synpases belong to class 0:
train_labels = np.append(np.ones((len(train_syn))), np.zeros(len(train_non_syn)))
    
    
 

In [32]:
print(train_syn.shape)

(74, 1)
