# Prepare data for PCA-rearing-detection method

I planed to use the results from baseline detection methods to do the pca one, but it seems there is no need. You can find the data preparation part in PCA_detection_methods.ipynb, so no need to read this notebook.

1. Already have a baseline methods to detect the rearing period
2. Get the rearing time from the baseline method as the training set
3. Train the PCA-rearing detection model


In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from glob import glob
from base import MultiDaysBeaconPosition, BeaconPosition
from scipy.stats import sem
from utils.basic_utils import get_tags
from utils.baseline_method import determine_rearing_period
from sklearn.decomposition import PCA

In [2]:
root_path = '../Data/Raw/'
rat_id = 'FS10'
rat_subdirectories = glob(root_path+rat_id+'/*/' )

tags = get_tags(rat_subdirectories)

# 1. Load data with different tags

In [3]:
tag_id = 0

In [4]:
this_tag = BeaconPosition(rat_subdirectories[tag_id], tags[tag_id], True, True)

In [188]:
unit_time = np.mean(this_tag.position_data[1:,0] - this_tag.position_data[:-1,0])
print(unit_time)

0.016694419627324594


# 2. Prepare data given T, tau

set T = 200 bins, tau = 10 bin

In [6]:
# set the parameters
save_fig = False # whether save the fig for the results

T = 200
tau = 10
group_step = 20

height_thresh = 0.6 # threshold for height/z to determine rearing

# get some important value for variable 
xy_speed = np.insert(this_tag.speed,0,0) # speed on xy planar insert 0 at the beginning of the speed array to make the size same
z_speed = np.divide(this_tag.position_data[1:,3] - this_tag.position_data[:-1,3], this_tag.position_data[1:,0] - this_tag.position_data[:-1,0])
z_speed = np.insert(z_speed,0,0) # insert 0 at the beginning of the speed array to make the size same 

z = this_tag.position_data[:,3]

In [7]:
z.shape

(108578,)

In [8]:
## inital goups
group_lists = []

for boundary in np.arange(0, T, group_step):
    print(boundary)
    group_lists.append([])

0
20
40
60
80
100
120
140
160
180


In [162]:
def generate_tau_sample(features, tau = 10, step = 2):
    '''
    generate a list of tau length sample by slicing the window along the whole time series
    features: [n,k]
    
    return : array in[k,m]
    '''
    # determine the size of the final output
    
    T = features.shape[0]
    k = features.shape[1] # # of features interested in 
    tau_samples = []# np.zeros((tau + (T-tau)//step, k))
    
    for end_idx in np.arange(tau, T+1, step):
        tau_samples.append(features[end_idx-tau: end_idx,:])
    
    
    return np.vstack(np.array(tau_samples)).T

In [11]:
n = T
N = n*20

for Begin in np.arange(0, z.shape[0],N):
    fig, axis = plt.subplots(4,5, figsize = (19,16), sharey = True, sharex = True)
    axis = axis.flatten()

    fig.subplots_adjust(left=0.08, right=0.98, bottom=0.05, top=0.90,
                        hspace=0.2, wspace=0.3)


    begin_idxs = np.arange(Begin,Begin+N, n)

    for i,begin_idx in enumerate(begin_idxs):
        this_features = np.zeros((n,3))
        this_features[:,0] = z[begin_idx: begin_idx+n]
        this_features[:,1] = z_speed[begin_idx: begin_idx+n]
        this_features[:,2] = xy_speed[begin_idx: begin_idx+n]
        this_rearing = determine_rearing_period(this_features,height_thresh= height_thresh, zspeed_thresh=0.5, xspeed_drop_thresh=0.2, gap_tolerance= 5, total_tolerance=10, )
        
        ## put this rearing results to the right group
        group_idx = int(len(this_rearing)/group_step)

        # plot results
    #     axis[i].plot(z[begin_idx: begin_idx+n],label ='height')
    #     if len(this_rearing)>0:
    #         x_bool = np.zeros(n) ==1
    #         x_bool[this_rearing] = True
    #         axis[i].fill_between(np.arange(0,n), 0.45, 0.80, alpha = 0.4, label = 'rearing period', where = x_bool, interpolate = True, step = 'mid')
    #     axis[i].set_xlabel('time bins', fontsize = 14)
    #     axis[i].set_ylabel('height', fontsize = 14)
    #     # break
        
    #     axis[i].legend(loc =1)
    #     #print(i)
        
    # title = 'Rearing period detection of rat %s, height thresh = %.2f, bins = [%d: %d]'%(rat_id, height_thresh, Begin, Begin+N)
    # fig.suptitle(title, fontsize = 16)
    # if save_fig:
    #     fig.savefig('results/easy_method/%s/%s.png'%(rat_id, title), format ='png',)
        

In [12]:
def organize_group_list(group_lists):
    '''
    organize the group list to be used in PCA
    '''
    group_len = []
    for this_list in group_lists:
        group_len.append(len(this_list))
    min_len = np.min(group_len)
    
    # randomly choose min(group_len) samples 
    features_matrix = []
    for i,this_list in enumerate(group_lists):
        random_idx = np.random.choice(np.arange(0,group_len[i]),size = min_len, replace= False) # no replace
        random_sample = np.asarray(this_list)[random_idx]
        array_sample = np.hstack(np.hstack(random_sample)) # feature1 feature2 feature3
        features_matrix.append(array_sample)
    
    return np.asarray(features_matrix)
        

In [166]:
n = T
# get the initial group list
group_lists = []
for boundary in np.arange(0, T+1, group_step):
    group_lists.append([])
    
begin_idx = 0
while begin_idx+n <= z.shape[0]:
   
    this_features = np.zeros((n,3))
    this_features[:,0] = z[begin_idx: begin_idx+n]
    this_features[:,1] = z_speed[begin_idx: begin_idx+n]
    this_features[:,2] = xy_speed[begin_idx: begin_idx+n]
    this_rearing = determine_rearing_period(this_features,height_thresh= height_thresh, zspeed_thresh=0.5, xspeed_drop_thresh=0.2, gap_tolerance= 5, total_tolerance=10, )
    
    ## put this rearing results to the right group
    group_idx = int(len(this_rearing)/group_step)
    
    ## get the pca features
    pca_features = np.zeros((n,3))
    pca_features[:,0] = z[begin_idx: begin_idx+n]
    pca_features[:,1] = this_tag.position_data[begin_idx: begin_idx+n, 6]
    pca_features[:,2] = z_speed[begin_idx: begin_idx+n]
    
    tau_samples = generate_tau_sample(pca_features,tau= tau, step = 10)
    
    group_lists[group_idx].append(tau_samples)
    
    begin_idx+= n

# combine the last two
group_lists[-2].extend(group_lists[-1])
group_lists = group_lists[:-1]