# Libraries importation

In [1]:
# Import all libraries needed 

import matplotlib.pyplot as plt
import pandas as pd
import sys 
import scipy
import numpy as np

from scipy.signal import *
from mpl_toolkits.mplot3d import Axes3D
from sklearn.decomposition import PCA
from sklearn.cluster import AgglomerativeClustering
from sklearn import metrics

In [2]:
sys.path.insert(0, '../packaging_PIR')
from neural_data_treatment_pkg.PrintFunctions import *
from neural_data_treatment_pkg.AdaBandFlt import *

In [3]:
# Enable inline plotting
%matplotlib inline
# Enable outline plotting
%matplotlib tk

# Data Loading (execute only the one corresponding to your device)

In [8]:
# file path of csv file

Location = r'/Users/sylva/Documents/SUPAERO/2A/PIR/Data/Wetransfer_data/E18KABaseline_BcutV2groundAll.txt'

In [5]:
Location = r'/Users/louiseplacidet/Desktop/PIR/Data/new_spike_data/newdata/E18KABaseline_BcutV2groundAll.txt'

In [6]:
# complete this one with your own path

#Location = 

In [9]:
# create dataframe
df = pd.read_csv(Location, sep='\t',skiprows=[0,1,3] , index_col='%t           ')

In [10]:
df.columns

Index(['El 21       ', 'El 31       ', 'El 41       ', 'El 22       ',
       'El 32       ', 'El 42       ', 'El 23       ', 'El 33       ',
       'El 43       ', 'El 15       '],
      dtype='object')

# Defining the sampling frequency and the alignement method for the rest of the notebook

In [11]:
fs = 25000
align_method = 'indice_1er_depass'
y_lim_min = -25
y_lim_max = 25

# Cuting and filtering the signal

In [12]:
#####################################################################################################################
####  BANK OF PARTS OF DATA
size = 1000000
all_raw_data = df #Entire recording from all electrodes

#full_signal = df.iloc[:,1] #Entire recording from electrode 58
full_signal = df.loc[:size,'El 31       '] #Entire recording from electrode 58

electrode_ref = df.loc[:size,'El 15       ']

# Desired cutoff frequencies (in Hz).
lowcut = 100.0
highcut = 5000.0

#y = butter_bandpass_filter(df.iloc[:,1], lowcut, highcut, fs, order=6)
y = butter_bandpass_filter(df.iloc[:size,6], lowcut, highcut, fs, order=5)
y_ref = butter_bandpass_filter(df.iloc[:size,8],lowcut,highcut,fs,order=5)

filtereddf = pd.DataFrame(y)
filtereddf.index = df.index[:size]

filtereddf_ref = pd.DataFrame(y_ref)
filtereddf_ref.index = df.index[:size]


signal_filtered = filtereddf.iloc[:,0] #Entire recording filtered by bandpass, for one electrode
signal_filtered_ref = filtereddf_ref.iloc[:,0]


###########################
## Signal de 20s

xminspike = int(np.round(12548*(fs/1000)))
xmaxspike = int(np.round(13000*(fs/1000)))

burst_data = filtereddf.iloc[xminspike:xmaxspike,0]

  b = a[a_slice]


## selecting the signal or the part of the signal

In [13]:
signal = signal_filtered

# Run the noise initialisation

In [14]:
noise_levels = init_noise_levels(signal_filtered, fs, 
                                  noise_window_size = 0.01,
                                  required_valid_windows = 20,
                                  old_noise_level_propagation = 0.8, 
                                  test_level = 5,
                                  estimator_type = "RMS",
                                  percentile_value = 25,
                                  plot_estimator_graph = True)

# Find the spikes

In [15]:
spike_info = find_spikes(signal, noise_levels, fs,
                           window_size = 0.002,
                           noise_window_size = 0.01,
                           threshold_factor = 3.5,
                           positive_threshold_factor = 0.33,
                           maxseparation = 0.001,
                           time_checkmaxlocal = 0.0002,
                           burst_threshold = 7)

spike_fine_tuning(spike_info)

# Record the spikes

In [16]:
spike_data_burst = record_spikes(signal, fs, spike_info.loc[spike_info['burst?'] == True],
                              align_method,
                              t_before = 0.001,
                              t_after = 0.002)

spike_data_no_burst = record_spikes(signal, fs, spike_info.loc[spike_info['burst?'] == False],
                              align_method,
                              t_before = 0.001,
                              t_after = 0.002)

In [17]:
print_spikes(spike_data_burst,
             t_before_alignement = 0,
             first_spike = 0,
             last_spike = -1,
             fs = fs,
             randomize = True,
             nb_spike = 20,
             y_lim_min = y_lim_min,
             y_lim_max = y_lim_max)

In [18]:
print_spikes(spike_data_no_burst,
             t_before_alignement = 0,
             first_spike = 0,
             last_spike = -1,
             fs = fs,
             randomize = True,
             nb_spike = 20,
             y_lim_min = y_lim_min,
             y_lim_max = y_lim_max)

In [19]:
spike_data_oneline_burst = record_spikes_oneline(signal, fs, spike_info.loc[spike_info['burst?'] == True],
                                                  align_method,
                                                  t_before = 0.001,
                                                  t_after = 0.002)

spike_data_oneline_no_burst = record_spikes_oneline(signal, fs, spike_info.loc[spike_info['burst?'] == False],
                                                  align_method,
                                                  t_before = 0.001,
                                                  t_after = 0.002)

In [20]:
print_spikes_oneline(signal, spike_data_oneline_burst)

In [21]:
print_spikes_oneline(signal, spike_data_oneline_no_burst)

# Bilan PCA + AGGLOMERATIVE CLUSTERING

## PCA and AGGLOMERATIVE CLUSERING on spikes

In [22]:
def PCA_and_AGGLOCLUST_spikes(spike_data, spike_info, nb_PCA_components=3,
                              n_clusters=5, metric='euclidean', linkage='ward'):
    
    ## on rééquilibre les valeurs dans les différentes dimensions
    #pca_data = np.array(spike_data.iloc[:,1:].values).transpose()
    #pca_data = StandardScaler().fit_transform(pca_data) # normalizing the features
    
    ## PCA
    pca_data = np.array(spike_data.values).transpose()
    pca = PCA(n_components=nb_PCA_components)
    pca.fit(pca_data)
    PCA_X = pca.transform(pca_data)
    
    ## AGGLOMERATIVE CLUSTERING
    ## Different linkages: 'ward', 'average', 'complete', 'single'
    
    aggloclustering = AgglomerativeClustering(n_clusters=n_clusters, affinity = metric,
                                    linkage=linkage)
    aggloclustering.fit(PCA_X)
    
    labels = aggloclustering.labels_

    # Number of clusters in labels, ignoring noise if present.
    n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
    n_noise_ = list(labels).count(-1)
    
    ## Ajout du label des clusters dans spike info
    spike_info['cluster_label'] = aggloclustering.labels_
    
    return PCA_X, aggloclustering, spike_info

In [23]:
PCA_X, aggloclustering, updated_spike_info = PCA_and_AGGLOCLUST_spikes(spike_data_burst,
                                                                        spike_info.loc[spike_info['burst?'] == True], 
                                                                        nb_PCA_components=3,
                                                                        n_clusters=5, 
                                                                        metric="euclidean", 
                                                                        linkage="ward")
labels = aggloclustering.labels_

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [24]:
PCA_X_, aggloclustering_, updated_spike_info_ = PCA_and_AGGLOCLUST_spikes(spike_data_no_burst,
                                                                        spike_info.loc[spike_info['burst?'] == False], 
                                                                        nb_PCA_components=3,
                                                                        n_clusters=5, 
                                                                        metric="euclidean", 
                                                                        linkage="ward")
labels_ = aggloclustering_.labels_

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


## Plotting the PCA

In [26]:
PCA_plot(PCA_X)
print_clusters_3d(labels, PCA_X)

In [27]:
PCA_plot(PCA_X_)
print_clusters_3d(labels_, PCA_X_)

# Print the spikes clusterized

In [28]:
print_spikes_clusterized(spike_data_burst,
                             labels,
                             t_before_alignement = 0.001,
                             nb_spike = 20,
                             y_lim_min = y_lim_min,
                             y_lim_max = y_lim_max,
                             fs = fs)

In [29]:
print_spikes_clusterized(spike_data_no_burst,
                             labels_,
                             t_before_alignement = 0.001,
                             nb_spike = 20,
                             y_lim_min = y_lim_min,
                             y_lim_max = y_lim_max,
                             fs = fs)

In [30]:
spike_data_clusterized_oneline_burst = record_spikes_clusterized_oneline(signal, 
                                                                      fs, 
                                                                      updated_spike_info,
                                                                      align_method,
                                                                       labels,
                                                                      t_before = 0.001,
                                                                      t_after = 0.002)

The labels located in labels will be used


In [31]:
spike_data_clusterized_oneline_no_burst = record_spikes_clusterized_oneline(signal, 
                                                                      fs, 
                                                                      updated_spike_info_,
                                                                      align_method,
                                                                       labels_,
                                                                      t_before = 0.001,
                                                                      t_after = 0.002)

The labels located in labels will be used


In [32]:
print_spikes_clusterized_oneline(signal, spike_data_clusterized_oneline_burst,
                             y_lim_min = y_lim_min,
                             y_lim_max = y_lim_max,)

In [33]:
print_spikes_clusterized_oneline(signal, spike_data_clusterized_oneline_no_burst,
                             y_lim_min = y_lim_min,
                             y_lim_max = y_lim_max,)