In [None]:
from caiman.source_extraction.cnmf import cnmf

import matplotlib.pyplot as plt
import numpy as np
from scipy import interpolate, stats
import scipy.io as sio
import seaborn as sns

from collections import defaultdict
import os

from src.datetime import add_frames_to_datetime, datetime_to_frame, image_desc_to_datetime, timestamp_to_datetime
from src.tensor import min_max
from src.tensor_creation_hyperparams import Hyperparams

## Hyperparameter Setup

In [None]:
# Hyperparameters for F147
F147 = Hyperparams(name='F147')
F147.set_data_paths(estimates=[
    'results/F147_0_memmap__d1_247_d2_256_d3_1_order_C_frames_20995_.hdf5',
    'results/F147_1_memmap__d1_73_d2_256_d3_1_order_C_frames_20995_.hdf5'
])
F147.set_trial_metadata(
    trial='data/2p_raw/F147/20210526_LT_18_0.mat',
    trial_var='trial',
    trial_time_field='timestamps',
    trial_output_field='output',
    trial_heat_onset_field='laseron',
    trial_turn_field='turn_frame',
    trial_fr=160
)
F147.set_image_metadata(
    image='results/F147_imfinfo_edit.mat',
    image_var='image',
    image_time_field='ImageDescription',
    image_fr=4.5
)
F147.set_component_evaluation(snr_thr=1.25, baseline_name='baseline', baseline_selected=1)
F147.set_alignment_params(max_seconds_turn_to_end=20)

In [None]:
# Hyperparameters for F201
F201 = Hyperparams(name='F201')
F201.set_data_paths(estimates=[
    'results/F201_0_memmap__d1_320_d2_256_d3_1_order_C_frames_24040_.hdf5'
])
F201.set_trial_metadata(
    trial='data/2p_raw/F201/20210812_RT_13_59.mat',
    trial_var='trial',
    trial_time_field='timestamps',
    trial_output_field='output',
    trial_heat_onset_field='laseron',
    trial_turn_field='turn_frame',
    trial_fr=160
)
F201.set_image_metadata(
    image='results/F201_imfinfo_edit.mat',
    image_var='image',
    image_time_field='ImageDescription',
    image_fr=4.5
)
F201.set_component_evaluation(snr_thr=1.25, baseline_name='baseline', baseline_selected=1)
F201.set_alignment_params(max_seconds_turn_to_end=20)

In [None]:
# Currently selected hyperparameters
hyp = F147

## Metadata Loading

In [None]:
# Move to the main project directory
os.chdir('../')

In [None]:
# Load trial and image metadata
trial_info = sio.loadmat(hyp.trial)[hyp.trial_var].flatten()
image_info = sio.loadmat(hyp.image)[hyp.image_var].flatten()

## Trial Grouping

In [None]:
# Get the indices of the fields containing time information in the trial metadata
trial_time_index = trial_info.dtype.names.index(hyp.trial_time_field)

# Get the indices of the fields containing time information in the image metadata
image_time_index = image_info.dtype.names.index(hyp.image_time_field)

In [None]:
# Initialize an array to store the trial that each frame belongs to
trials_by_frame = np.empty(image_info.size)

# Initialize an array to store the number of frames for each trial
frames_per_trial = np.zeros(trial_info.size)

In [None]:
# Initialize arrays to hold start and end time data for each trial
trial_times_start = np.empty(trial_info.size, dtype='datetime64[us]')
trial_times_end = np.empty(trial_info.size, dtype='datetime64[us]')

# Initialize an array to hold the time data for each frame
frame_timestamps = np.empty(image_info.size, dtype='datetime64[us]')

In [None]:
# Find the start and end time of each trial
for i in range(trial_info.size):
    trial_times_start[i] = timestamp_to_datetime(trial_info[i][trial_time_index][0])
    trial_times_end[i] = timestamp_to_datetime(trial_info[i][trial_time_index][-1])

In [None]:
# Initialize trial grouping parameters
trial_curr = 0
image_curr = 0

In [None]:
# Find which trial each frame belongs to
while image_curr < image_info.size:
    
    # Record the time of the current frame
    image_time = image_desc_to_datetime(image_info[image_curr][image_time_index][0])
    frame_timestamps[image_curr] = image_time
    
    # Check if the time of the current frame is before the start time of the current trial
    if image_time < trial_times_start[trial_curr]:
        
        # The current frame does not belong to any trial
        trials_by_frame[image_curr] = np.nan
        
        # Move on to the next frame
        image_curr += 1
    
    # Check if the time of the current frame is after the end time of the current trial
    elif image_time > trial_times_end[trial_curr]:
        
        # Move on to the next trial if there are still trials remaining
        if trial_curr < trial_info.size - 1:
            trial_curr += 1
        
        # The current frame is past the end time of the last trial otherwise
        else:
            
            # Therefore, the current frame does not belong to any trial
            trials_by_frame[image_curr] = np.nan
            
            # Move on to the next frame
            image_curr += 1
    
    # The time of the current frame is within the time of the current trial otherwise
    else:
        
        # Record the trial this frame belongs to
        trials_by_frame[image_curr] = trial_curr
        
        # Update the number of frames contained in the current trial
        frames_per_trial[trial_curr] += 1
        
        # Move on to the next frame
        image_curr += 1

## Data Loading

In [None]:
# Load all files containing results
cnms = []
for fname in hyp.estimates:
    cnms.append(cnmf.load_CNMF(fname))

In [None]:
# Get the neural activity traces
traces = []
for cnm in cnms:
    traces.append(cnm.estimates.S)

In [None]:
# Concatenate all traces
data_orig = np.concatenate(traces, axis=0)

## Component Evaluation

In [None]:
# Get the index of the field in the trial metadata containing output
trial_output_index = trial_info.dtype.names.index(hyp.trial_output_field)

In [None]:
# Find all trials that are baselines
baselines = []
for i in range(trial_info.size):
    if trial_info[i][trial_output_index][0] == hyp.baseline_name:
        baselines.append(i)

In [None]:
# Using the selected baseline as the noise region, calculate the standard deviation of the signal region
std_sig = np.std(data_orig[:, np.where(trials_by_frame != baselines[hyp.baseline_selected])].squeeze(), axis=1, ddof=1)

# Using the selected baseline as the noise region, calculate the standard deviation of the noise region
std_noise = np.std(data_orig[:, np.where(trials_by_frame == baselines[hyp.baseline_selected])].squeeze(), axis=1, ddof=1)

# Calculate the signal-to-noise ratio for each component
sig_noise_ratio = std_sig / std_noise

In [None]:
# Classify components as noise if their signal-to-noise ratios are below the threshold
noise_indices = []
for i in range(len(sig_noise_ratio)):
    if sig_noise_ratio[i] < hyp.snr_thr:
        noise_indices.append(i)

# Remove all noise components
data = np.delete(data_orig, noise_indices, axis=0)

## Normalization

In [None]:
# Create an empty dictionary to store normalized data
data_norm = {}

In [None]:
# Z-score normalization
data_norm['Z-Score'] = stats.zscore(data, axis=1, ddof=1)

In [None]:
# Min-max normalization
data_norm['Min-Max'] = min_max(data, axis=1)

## Heatmap

In [None]:
# Change the default figure size
sns.set_theme(rc={'figure.figsize': (17, 8.5)})

In [None]:
# Create heatmaps for each method of normalization
for method in data_norm:
    sns.heatmap(data_norm[method], cmap='jet')
    
    # Add a title and labels to the heatmap
    plt.title("Extracted Sources Normalized Using " + method)
    plt.xlabel("Frame")
    plt.ylabel("Source")
    
    # Display the final heatmap
    plt.show()

## Time Series Alignment

In [None]:
# Create empty arrays to hold the heat onset and initial turn times
heat_onset = np.empty(trial_info.size, dtype='datetime64[us]')
initial_turn = np.empty(trial_info.size, dtype='datetime64[us]')

In [None]:
# Get the indices of the fields in the trial metadata containing information on heat onset and turns
heat_onset_index = trial_info.dtype.names.index(hyp.trial_heat_onset_field)
turn_index = trial_info.dtype.names.index(hyp.trial_turn_field)

In [None]:
# Find the heat onset and first turn times for each trial
for i in range(trial_info.size):
    
    # Convert the frame of the heat onset into a time and save it
    heat_onset_frame = trial_info[i][heat_onset_index][0, 0]
    heat_onset[i] = add_frames_to_datetime(trial_times_start[i], heat_onset_frame, hyp.trial_fr)
    
    # Get all frames of turns
    turn_frames = trial_info[i][turn_index][0]
    
    # Do not save a time if there are no turns
    if turn_frames.size == 0:
        initial_turn[i] = np.datetime64('NaT')
    
    # Save the time of the first frame with a turn otherwise
    else:
        initial_turn[i] = add_frames_to_datetime(trial_times_start[i], turn_frames[0], hyp.trial_fr)

In [None]:
# Create a dictionary to store the time and frame number of all frames between the start and heat onset of each trial
start_to_heat = defaultdict(dict)

# Create a dictionary to store the time and frame number of all frames between the heat onset and first turn of each trial
heat_to_turn = defaultdict(dict)

# Create a dictionary to store the time and frame number of all frames between the first turn and end of each trial
turn_to_end = defaultdict(dict)

In [None]:
# Create an empty list to store trials to be kept
valid_trials = []

In [None]:
# Add frame and time information to the dictionaries
for trial in range(trial_info.size):
    
    # Do not keep baselines or trials where no turns occurred
    if trial in baselines or np.isnat(initial_turn[trial]):
        continue
    
    # Add the trial number to the list of valid trials
    valid_trials.append(trial)
    
    # Determine which time interval each frame in the current trial belongs to
    for frame in np.where(trials_by_frame == trial)[0]:
        
        # The frame is between the start and heat onset if its time is before the heat onset time
        if frame_timestamps[frame] < heat_onset[trial]:
            start_to_heat[trial][frame] = frame_timestamps[frame]
        
        # Otherwise, the frame is between the heat onset and first turn if its time is before the first turn time
        elif frame_timestamps[frame] < initial_turn[trial]:
            heat_to_turn[trial][frame] = frame_timestamps[frame]
        
        # Otherwise, the frame is between the first turn and end
        else:
            turn_to_end[trial][frame] = frame_timestamps[frame]
    
    
    ########## HEURISTIC CODE - ASK AT MEETING
    ########## HEURISTIC CODE - ASK AT MEETING
    ########## HEURISTIC CODE - ASK AT MEETING
    ########## HEURISTIC CODE - ASK AT MEETING
    ########## HEURISTIC CODE - ASK AT MEETING
    if len(heat_to_turn[trial]) == 0:
        valid_trials.remove(trial)

In [None]:
# Initialize values to determine how many frames to use for each time interval
n_frames_start_to_heat = 0
n_frames_heat_to_turn = 0
n_frames_turn_to_end = np.inf

In [None]:
# Find the average number of frames between the start and heat onset of each trial
for trial in valid_trials:
    n_frames_start_to_heat += len(start_to_heat[trial])
n_frames_start_to_heat = round(n_frames_start_to_heat / len(valid_trials))

In [None]:
# Find the average number of frames between the heat onset and first turn of each trial
for trial in valid_trials:
    n_frames_heat_to_turn += len(heat_to_turn[trial])
n_frames_heat_to_turn = round(n_frames_heat_to_turn / len(valid_trials))

In [None]:
# Find the minimum number of frames between the first turn and end of each trial
for trial in valid_trials:
    n_frames_turn_to_end = min(n_frames_turn_to_end, len(turn_to_end[trial]))

# Place an upper bound on the minimum
n_frames_turn_to_end = min(n_frames_turn_to_end, round(hyp.max_seconds_turn_to_end * hyp.image_fr))

## Interpolation

In [None]:
# Initialize empty tensors to store interpolated time series for each time interval
interpol_start_to_heat = np.empty((len(valid_trials), data.shape[0], n_frames_start_to_heat))
interpol_heat_to_turn = np.empty((len(valid_trials), data.shape[0], n_frames_heat_to_turn))
interpol_turn_to_end = np.empty((len(valid_trials), data.shape[0], n_frames_turn_to_end))

In [None]:
# Interpolate points from trial start times to heat onset times
for i, trial in enumerate(valid_trials):
    
    # Find the start and end times of this interval using the first and last frames
    frame_start = min(start_to_heat[trial])
    frame_end = max(start_to_heat[trial])
    time_start = start_to_heat[trial][frame_start]
    time_end = start_to_heat[trial][frame_end]
    
    # Find the smallest time interval needed to separate the total time into n_frames_start_to_heat sections
    time_unit = (time_end - time_start) / (n_frames_start_to_heat - 1)
    
    # Separate the interval and find frame numbers needed for interpolation
    times_interpol = np.arange(time_start, time_end, time_unit)
    frames_interpol = datetime_to_frame(times_interpol, time_start, frame_start, hyp.image_fr)
    
    # Create a function to interpolate the time series
    x = np.array(list(start_to_heat[trial].keys()))
    y = data[:, x]
    f = interpolate.interp1d(x, y, axis=1)
    
    # Save the interpolated values
    interpol_start_to_heat[i] = f(frames_interpol)

In [None]:
# Interpolate points from heat onset times to initial turn times
for i, trial in enumerate(valid_trials):
    
    # Find the start and end times of this interval using the first and last frames
    frame_start = min(heat_to_turn[trial])
    frame_end = max(heat_to_turn[trial])
    time_start = heat_to_turn[trial][frame_start]
    time_end = heat_to_turn[trial][frame_end]
    
    # Find the smallest time interval needed to separate the total time into n_frames_heat_to_turn sections
    time_unit = (time_end - time_start) / (n_frames_heat_to_turn - 1)
    
    # Separate the interval and find frame numbers needed for interpolation
    times_interpol = np.arange(time_start, time_end, time_unit)
    frames_interpol = datetime_to_frame(times_interpol, time_start, frame_start, hyp.image_fr)
    
    # Create a function to interpolate the time series
    x = np.array(list(heat_to_turn[trial].keys()))
    y = data[:, x]
    f = interpolate.interp1d(x, y, axis=1)
    
    # Save the interpolated values
    interpol_heat_to_turn[i] = f(frames_interpol)

In [None]:
# Find only the first n_frames_turn_to_end frames for initial turn times to trial end times
for i, trial in enumerate(valid_trials):
    frames_keep = np.array(list(turn_to_end[trial].keys())[:n_frames_turn_to_end])
    
    # Save the first n_frames_turn_to_end frames
    interpol_turn_to_end[i] = data[:, frames_keep]

## Tensor Creation

In [None]:
# Concatenate the interpolation results into a tensor
tensor = np.concatenate((interpol_start_to_heat, interpol_heat_to_turn, interpol_turn_to_end), axis=2)

In [None]:
# Get tensor shape information
trials, neurons, times = tensor.shape

In [None]:
# Min-max normalization
tensor_norm = np.reshape(min_max(np.reshape(tensor, (neurons, times * trials)), axis=1), (trials, neurons, times))

In [None]:
# Save the normalized tensor
np.save('results/' + hyp.name + '_tensor.npy', tensor_norm)