# Import packages

In [None]:
%load_ext autoreload
%autoreload 2

from pathlib import Path
import os, sys
for p in [Path.cwd()] + list(Path.cwd().parents):
    if p.name == 'Multifirefly-Project':
        os.chdir(p)
        sys.path.insert(0, str(p / 'multiff_analysis/multiff_code/methods'))
        break
    
import sys
from data_wrangling import specific_utils, process_monkey_information
from pattern_discovery import pattern_by_trials, pattern_by_trials, cluster_analysis, organize_patterns_and_features
from visualization.matplotlib_tools import plot_behaviors_utils
from neural_data_analysis.neural_analysis_tools.get_neural_data import neural_data_processing
from neural_data_analysis.neural_analysis_tools.visualize_neural_data import plot_neural_data, plot_modeling_result
from neural_data_analysis.neural_analysis_tools.model_neural_data import transform_vars, neural_data_modeling, drop_high_corr_vars, drop_high_vif_vars
from neural_data_analysis.topic_based_neural_analysis.neural_vs_behavioral import prep_monkey_data, prep_target_data, neural_vs_behavioral_class
from neural_data_analysis.topic_based_neural_analysis.planning_and_neural import planning_and_neural_class, pn_utils


import os, sys
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import math
import seaborn as sns
from matplotlib import rc
from os.path import exists
from numpy import random
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.cross_decomposition import CCA
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score
from statsmodels.stats.outliers_influence import variance_inflation_factor
from sklearn.model_selection import train_test_split
from importlib import reload
import neo
import gc

import sys
import os, sys
import numpy as np
import torch
from numpy import pi
from matplotlib import pyplot as plt

import numpy as np
import scipy.linalg as linalg
import scipy.interpolate as interpolate
from scipy.signal import fftconvolve


from scipy.io import loadmat
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV
import sys

import scipy.sparse as sparse
import matplotlib.pylab as plt
import subprocess

plt.rcParams["animation.html"] = "html5"
os.environ['KMP_DUPLICATE_LIB_OK']='True'
rc('animation', html='jshtml')
matplotlib.rcParams.update(matplotlib.rcParamsDefault)
matplotlib.rcParams['animation.embed_limit'] = 2**128
pd.set_option('display.float_format', lambda x: '%.5f' % x)
np.set_printoptions(suppress=True)
print("done")

%load_ext autoreload
%autoreload 2

# Retrieve data

In [None]:
raw_data_folder_path = "all_monkey_data/raw_monkey_data/monkey_Bruno/data_0330"

In [None]:
# raw_data_folder_path = "all_monkey_data/raw_monkey_data/monkey_Schro/data_0416"

In [None]:
data_item = neural_vs_behavioral_class.NeuralVsBehavioralClass(raw_data_folder_path=raw_data_folder_path)
data_item.streamline_preparing_neural_and_behavioral_data()

# Explore neural data

## prepare x_var

In [None]:
x_var = data_item.binned_spikes_matrix.copy()

## Convolve

In [None]:
x_var_convolved = neural_data_processing.convolve_neural_data(x_var, kernel_len=7)

## Lag

In [None]:
data_item._get_y_var_lags()
data_item.y_var_lags

In [None]:
synthetic_data = np.concatenate([np.arange(20).reshape(-1,1), np.arange(1, 21).reshape(-1,1)], axis=1)
synthetic_data_lags = neural_data_processing.add_lags_to_each_feature(synthetic_data, lag_numbers)
synthetic_data_lags

## firing rate

In [None]:
# get convolved windows
all_windows = np.repeat(data_item.bin_width, len(x_var_convolved))
convolved_windows = np.convolve(all_windows, data_item.convolve_pattern, 'same')

# apply convolution to every column in binned_spikes_df
firing_rate_df = data_item.binned_spikes_df.apply(lambda x: np.convolve(x, data_item.convolve_pattern, 'same')/convolved_windows, axis=0)
firing_rate_df

## Plot interspike intervals

In [None]:
num_spikes = 1000
plt.plot(range(num_spikes), np.diff(data_item.spikes_df['time'][:num_spikes+1].values))
plt.title("Interspike intervals")
plt.show()


## Differentiate spikes based on clusters

In [None]:
spikes_to_plot = range(10000, 10500)
plt.figure(figsize=(10, 10))
plt.scatter(data_item.spikes_df.time[spikes_to_plot], data_item.spikes_df.cluster[spikes_to_plot], s=2)
# fix the yticks to be the cluster names
plt.yticks(data_item.spikes_df.cluster[spikes_to_plot].unique(), data_item.spikes_df.cluster[spikes_to_plot].unique())
plt.title("Spikes")
plt.show()


## Correlation between spike clusters

In [None]:
corr_threshold_to_mark = 0.5
corr_coeff = pd.DataFrame(data_item.all_binned_spikes).corr()
corr_coeff_matrix = corr_coeff.values

# Take out the indices in the correlation matrix of the values that are above the threshold
indices = np.where(np.abs(corr_coeff_matrix) > corr_threshold_to_mark)
high_corr_df = pd.DataFrame({'cluster1': indices[0], 'cluster2': indices[1], 'corr_value': corr_coeff_matrix[indices]})
# delete the rows where cluster1 == cluster2
high_corr_df = high_corr_df[high_corr_df.cluster1 != high_corr_df.cluster2]
high_corr_df.sort_values(by='corr_value', ascending=False, inplace=True)

# also, delete the duplicates
high_corr_df['both_clusters'] = high_corr_df.apply(lambda x: tuple(sorted([x['cluster1'], x['cluster2']])), axis=1)
high_corr_df.drop_duplicates(subset='both_clusters', inplace=True)
high_corr_df.drop(columns='both_clusters', inplace=True)
high_corr_df


In [None]:
# Plot a heatmap if it will not be too large
if corr_coeff.shape[0] < 30:
    plt.figure(figsize = (15, 15))
    sns.heatmap(corr_coeff, cmap='coolwarm', annot=True, linewidths=1, vmin=-1)
    plt.show()

# Relating neural data to other variables

### Catching a target

#### individual instances

In [None]:
time_to_sample_from = data_item.ff_caught_T_new
plot_neural_data.make_individual_spike_plots(time_to_sample_from, data_item.spikes_df, data_item.unique_clusters, 
                                                  max_plots=2)

#### overlaid

In [None]:
time_to_sample_from = data_item.ff_caught_T_new
plot_neural_data.make_overlaid_spike_plot(time_to_sample_from, data_item.spikes_df, data_item.unique_clusters, 
                                               max_rows_to_plot=2)

### Stop (whether or not resulting in a capture?)

In [None]:
time_to_sample_from = data_item.monkey_information[data_item.monkey_information['monkey_speeddummy'] == 0].time.values
plot_neural_data.make_overlaid_spike_plot(time_to_sample_from, data_item.spikes_df, data_item.unique_clusters, 
                                               max_rows_to_plot=3)

### High speed

In [None]:
time_to_sample_from = data_item.monkey_information[data_item.monkey_information['speed'] > 100].time.values
plot_neural_data.make_overlaid_spike_plot(time_to_sample_from, data_item.spikes_df, data_item.unique_clusters, 
                                               max_rows_to_plot=2)

### Visible_before_last_one instances

#### individual

In [None]:
current_i = 1

In [None]:
data_item.make_or_retrieve_target_clust_last_vis_df()
max_plots = 2

data_item.target_cluster_VBLO = pattern_by_trials.find_target_cluster_visible_before_last_one(data_item.target_clust_last_vis_df, data_item.ff_caught_T_new)
plot_neural_data.make_individual_spike_plot_from_target_cluster_VBLO(data_item.target_cluster_VBLO, data_item.spikes_df, data_item.unique_clusters, starting_row=current_i, max_plots=max_plots)
current_i += max_plots


#### overlaid

In [None]:
time_to_sample_from = data_item.target_cluster_VBLO['caught_time'].values
plot_neural_data.make_overlaid_spike_plot(time_to_sample_from, data_item.spikes_df, data_item.unique_clusters, 
                                               max_rows_to_plot=4)

### Just try it randomly

In [None]:
time_to_sample_from = random.uniform(100, 1000, 100)
plot_neural_data.make_overlaid_spike_plot(time_to_sample_from, data_item.spikes_df, data_item.unique_clusters, 
                                               max_rows_to_plot=4)

# Appendix

## See other neural data

### plx 
(it's better to use Matlab to get time offset)

In [None]:
exists('/Volumes/Elements/multiff/Bruno/U-probe/7a/Mar 30 2018/neural data')

In [None]:
import os, sys
import pandas as pd
import neo

# Define file paths and file name
original_file_path = '/Volumes/Elements/multiff/Bruno/U-probe/7a/Mar 30 2018/neural data'
new_file_path = '/Users/dusiyi/Downloads/neural_data_temp_folder/'
file_name = 'm51s022_ead'
fname = os.path.join(original_file_path, file_name + '.plx')

# Function to read event timestamps from a .plx file
def plx_event_ts_modified(filename, ch):
    reader = neo.io.PlexonIO(filename)
    block = reader.read_block()
    event_channel = block.segments[0].eventarrays[ch-1]  # ch is 1-based, neo is 0-based
    ts = event_channel.times.rescale('s').magnitude  # Convert to seconds
    sv = event_channel.labels
    freq = reader.header['signal_channels'][0][2]  # Assuming the frequency is stored here
    return len(ts), ts, sv, freq

# Read the event timestamps
n, ts, sv, freq = plx_event_ts_modified(fname, 257)
ts_s = ts / freq

# Create a DataFrame for aligning data
neural_event_time = pd.DataFrame({'label': sv, 'timestamp': ts, 'time': ts_s})

# Write the DataFrame to a text file
output_file = os.path.join(new_file_path, file_name + '.txt')
neural_event_time.to_csv(output_file, index=False, sep='\t')

# Display the contents of the text file
with open(output_file, 'r') as file:
    print(file.read())

### nev

In [None]:
import neo

In [None]:
ns6_path = '/Users/dusiyi/Documents/Multifirefly-Project/all_monkey_data/neural_data/monkey_Schro/data_0416/Sorted/m53s453.nev'

In [None]:
reader = neo.io.BlackrockIO(filename=ns6_path)
reader.parse_header()

In [None]:
block = reader.read_block()
event_data = block.segments[0].events

In [None]:
for event in event_data:
    if event.name =='digital_input_port':
        event_df = pd.DataFrame({'time': event.times, 'label': event.labels})
        break

In [None]:
# if label == 1 occurs within the first 10 rows, then save the first 10 rows of event df
if event_df.loc[event_df['label']=='1'].index.min() < 10:
    event_df.iloc[:10].to_csv('event_df.csv')

In [None]:
# to get the precise time offset
event_df.loc[event_df['label']=='1', 'time'].iloc[0]

### ccf

In [None]:
ccf_path = '/Volumes/Elements/multiff/Schro/Utah Array/MultiFirefly/Apr 02 2018/neural data/m53s412.ccf'

In [None]:
ccf_path

In [None]:
file_path = 'path/to/your/file.ccf'

In [None]:
try:
    with open(ccf_path, 'r') as file:
        for line in file:
            print(line.strip())  # Process each line as needed
except FileNotFoundError:
    print(f"The file at {ccf_path} was not found.")
except Exception as e:
    print(f"An error occurred: {e}")

### other files in sorted data

In [None]:
# sorted_neural_data_path = os.path.join(raw_data_folder_path, 'neural_data/Sorted')

In [None]:
sorted_neural_data_path = '/Users/dusiyi/Documents/Multifirefly-Project/all_monkey_data/neural_data/monkey_Schro/data_0416/Sorted'

In [None]:
sorted_neural_data_path = '/Users/dusiyi/Documents/Multifirefly-Project/all_monkey_data/neural_data/monkey_Bruno/data_0330/Sorted'

In [None]:
filepath = os.path.join(sorted_neural_data_path, "spike_times.npy")
template_feature_ind = np.load(filepath)
print(template_feature_ind.shape)
template_feature_ind

In [None]:
# See other data
filepath = os.path.join(sorted_neural_data_path, 'cluster_KSLabel.tsv')
cluster_KSLabel=pd.read_csv(filepath,sep='\t')
cluster_KSLabel

In [None]:
filepath = os.path.join(sorted_neural_data_path, 'cluster_ContamPct.tsv')
cluster_ContamPct=pd.read_csv(filepath,sep='\t')
cluster_ContamPct

In [None]:
filepath = os.path.join(sorted_neural_data_path, 'cluster_Amplitude.tsv')
cluster_Amplitude=pd.read_csv(filepath,sep='\t')
cluster_Amplitude

In [None]:
filepath = os.path.join(sorted_neural_data_path, "pc_features.npy")
pc_feature_ind = np.load(filepath)
print("shape:", pc_feature_ind.shape)
pc_feature_ind

In [None]:
filepath = os.path.join(sorted_neural_data_path, "pc_feature_ind.npy")
pc_feature_ind = np.load(filepath)
print(pc_feature_ind.shape)
pc_feature_ind

In [None]:
pc_feature_ind.sum(axis=1)

In [None]:
filepath = os.path.join(sorted_neural_data_path, "channel_map.npy")
channel_map = np.load(filepath)
print(channel_map.shape)
channel_map

In [None]:
channel_map+1

In [None]:
filepath = os.path.join(sorted_neural_data_path, "channel_positions.npy")
channel_positions = np.load(filepath)
print(channel_positions.shape)
channel_positions

In [None]:

# Plot
plt.scatter(channel_positions[:, 0], channel_positions[:, 1], s=10)
plt.xlabel('X Position (µm)')
plt.ylabel('Y Position (µm)')
plt.title('Electrode Layout')
plt.show()

In [None]:
filepath = os.path.join(sorted_neural_data_path, "amplitudes.npy")
amplitudes = np.load(filepath)
print(amplitudes.shape)
amplitudes

In [None]:
filepath = os.path.join(sorted_neural_data_path, "whitening_mat.npy")
whitening_mat = np.load(filepath)
print(whitening_mat.shape)
whitening_mat

In [None]:
filepath = os.path.join(sorted_neural_data_path, "whitening_mat_inv.npy")
whitening_mat_inv = np.load(filepath)
print(whitening_mat_inv.shape)
whitening_mat_inv

In [None]:
filepath = os.path.join(sorted_neural_data_path, "templates.npy")
templates = np.load(filepath)
print(templates.shape)
templates

In [None]:
for k in range(templates.shape[1]):
    print(k, templates[:,k,:].sum())

In [None]:
filepath = os.path.join(sorted_neural_data_path, "templates_ind.npy")
templates_ind = np.load(filepath)
print(templates_ind.shape)
templates_ind

In [None]:
filepath = os.path.join(sorted_neural_data_path, "template_features.npy")
template_features = np.load(filepath)
print(template_features.shape)
template_features

In [None]:
filepath = os.path.join(sorted_neural_data_path, "template_feature_ind.npy")
template_feature_ind = np.load(filepath)
print(template_feature_ind.shape)
template_feature_ind

In [None]:
template_feature_ind.sum(axis=1)

In [None]:
filepath = os.path.join(sorted_neural_data_path, "similar_templates.npy")
similar_templates = np.load(filepath)
print(similar_templates.shape)

In [None]:
similar_templates.T

In [None]:
filepath = os.path.join(sorted_neural_data_path, "spike_times.npy")
spike_times = np.load(filepath)
print(spike_times.shape)
spike_times

In [None]:
filepath = os.path.join(sorted_neural_data_path, "spike_templates.npy")
spike_templates = np.load(filepath)
print(spike_templates.shape)
spike_templates

In [None]:
filepath = os.path.join(sorted_neural_data_path, "spike_clusters.npy")
spike_clusters = np.load(filepath)
print(spike_clusters.shape)
spike_clusters

In [None]:
len(np.where((spike_templates == spike_clusters))[0])/len(spike_templates.reshape(-1))
# So the two arrays are exactly the same

In [None]:
print(spike_clusters.reshape(-1).min(), spike_clusters.reshape(-1).max())

In [None]:
import h5py

filepath = os.path.join(sorted_neural_data_path, "QualityMetr.mat")
QualityMetr = {}
f = h5py.File(filepath)
for k, v in f.items():
    QualityMetr[k] = np.array(v)
QualityMetr



In [None]:
filepath = os.path.join(sorted_neural_data_path, "rez.mat")
arrays = {}
f = h5py.File(filepath)
for k, v in f.items():
    arrays[k] = np.array(v)
arrays

In [None]:
stop!

## Get mapping tables
(The mapping tables relate the paths on the hard drive to the local paths.)

### Bruno

In [None]:
monkey_name = 'monkey_Bruno'
hdrive_dir = '/Volumes/Elements/multiff/Bruno/U-probe/7a'
neural_data_folder_name = 'neural data'
filter_neural_file_func = lambda x: [f for f in x if ('plx' in f) & ('ead' not in f)]
bruno_mapping_table = neural_data_processing.get_mapping_table_between_hard_drive_and_local_folders(monkey_name, hdrive_dir, neural_data_folder_name, filter_neural_file_func)


In [None]:
# This code will work:
os.listdir('/Volumes/Elements/multiff/Bruno/U-probe/7a')

# # The code below will return "Invalid argument," and I'm still not exactly sure hwy
# os.listdir('/Volumes/Elements/multiff/Bruno/U-probe/7a/Mar 30 2018/neural data')


### Schro

In [None]:
monkey_name = 'monkey_Schro'
hdrive_dir = '/Volumes/Elements/multiff/Schro/Utah Array/MultiFirefly'
neural_data_folder_name = 'neural data/Sorted'
filter_neural_file_func = lambda x: [f for f in x if ('nev' in f)]
schro_mapping_table = neural_data_processing.get_mapping_table_between_hard_drive_and_local_folders(monkey_name, hdrive_dir, neural_data_folder_name, filter_neural_file_func)

### add time_offset_path

In [None]:
for monkey in ['schro', 'bruno']:
    mapping_table = pd.read_csv(f'/Users/dusiyi/Documents/Multifirefly-Project/multiff_analysis/multiff_code/methods/eye_position_analysis/neural_data_analysis/get_neural_data/{monkey}_mapping_table.csv')
    mapping_table['neural_event_time_path'] = mapping_table['local_path'].apply(lambda x: os.path.join(x.replace('neural_data', 'time_calibration'), 'neural_event_time.txt'))
    mapping_table.to_csv('/Users/dusiyi/Documents/Multifirefly-Project/multiff_analysis/multiff_code/methods/eye_position_analysis/neural_data_analysis/get_neural_data/{monkey}_mapping_table.csv', index=False)

### change column names in time_offset.txt

In [None]:
for index, row in mapping_table.iterrows():
    neural_event_time = row['neural_event_time_path']
    neural_event_time.replace('neural_data', 'processed_neural_data')
    if exists(neural_event_time):
        neural_event_time = pd.read_csv(neural_event_time)
        neural_event_time.rename(columns={'sv': 'label',
                                       'ts': 'timestamp',
                                        'ts_s': 'time'}, inplace=True)
        neural_event_time.to_csv(neural_event_time, index=False)
        print(neural_event_time)

## Get neural_event_time_path.txt

### Bruno

Main code is in '/Users/dusiyi/Documents/Multifirefly-Project/multiff_analysis/multiff_code/methods/eye_position_analysis/neural_data_analysis/MATLAB_processing/AlignNeuralDataXSessions.m'

### Schro

In [None]:
for index, row in schro_mapping_table.iterrows():
    ns6_path = row['hdrive_path']
    neural_event_time_path = row['neural_event_time_path']
    if not exists(neural_event_time_path):
        try:
            os.makedirs(os.path.dirname(neural_event_time_path), exist_ok=True)
            print(ns6_path)
            # print(neural_event_time_path)

            reader = neo.io.BlackrockIO(filename=ns6_path)
            reader.parse_header()
            block = reader.read_block()
            event_data = block.segments[0].events
            for event in event_data:
                if event.name =='digital_input_port':
                    event_df = pd.DataFrame({'time': event.times, 'label': event.labels})
                    break
            event_df.to_csv(neural_event_time_path, index=False)
        except Exception as e:
            print(f'Error: {e}')

Since problem are encountered for some sessions, one can extract time_offset.txt manually in matlab. See code in:
/Users/dusiyi/Documents/Multifirefly-Project/multiff_analysis/multiff_code/methods/eye_position_analysis/neural_data_analysis/MATLAB_processing/ManuallyExtractTimeOffset.m

#### from a specific session

In [None]:
ns6_path = '/Users/dusiyi/Documents/MATLAB/m53s436.nev'
neural_event_time_path = '/Users/dusiyi/Documents/Multifirefly-Project/all_monkey_data/time_calibration/monkey_Schro/data_0410'
if not exists(neural_event_time_path):
    try:
        os.makedirs(os.path.dirname(neural_event_time_path), exist_ok=True)
        print(ns6_path)
        # print(neural_event_time_path)

        reader = neo.io.BlackrockIO(filename=ns6_path)
        reader.parse_header()
        block = reader.read_block()
        event_data = block.segments[0].events
        for event in event_data:
            if event.name =='digital_input_port':
                event_df = pd.DataFrame({'time': event.times, 'label': event.labels})
                break
        event_df.to_csv(neural_event_time_path, index=False)
    except Exception as e:
        print(f'Error: {e}')