In [3]:
# Import relevant python packages.
import pandas as pd # For working with Pandas dataframes.
import numpy as np # For working with arrays.
import seaborn as sns # For data visualisation on top of Matplotlib.
import matplotlib.pyplot as plt # For data visualisation. 
import matplotlib as mpl # For data visualisation. 
import itertools 
import os
from scipy.stats import sem # For calculation of standard error. 
from matplotlib import rc
from matplotlib.pyplot import Line2D
import pingouin as pg # For initial statistical comparisons.
rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']}) # Initial plot formatting. 
mpl.rcParams['pdf.fonttype'] = 42

### Setting the directory

In [10]:
# Absolute path to the relevant directory
PATH = 'C:/Users/dylan/Documents/VGLUT2_check/' # To replicate, change to your own path or RDM path, if applicable.
os.chdir(PATH) # Change current wd to PATH

### Helper functions

In [6]:
# Helper functions 
def get_laminar_values(path):
    df = pd.read_csv(path) # Loads CSV with ID, X, Y, Channel.
    for i in df.groupby('ID'):
        animal = df[1]
        laminar_values = animal['Y'].values 

def normalise_values(values):
    '''
    A simple function for performing min-max normalisation on a list of grey values extracted from ImageJ's 'Plot Profile' plugin. 

    :param values: array or list of values to be min-max normalised.
    :return: the normalised values.
    
    '''
    return [(i - min(values))/(max(values)-min(values)) for i in values]

def interpolate(inp,fi): 
    '''
    Function to perform a linear interpolation to a pre-specified length, e.g., the largest length array present in set of samples.

    :param inp: list/array of values to be interpolated. 
    :param fi: the fractional index
    :return: the interpolated value

    '''
    fi = min(fi, len(inp) - 1) # New addition, might delete.
    i, f = int(fi//1), fi % 1
    j = i + 1 if f > 0 else i
    return (1-f) * inp[i] + f * inp[j]

def mean_of_middle_20_percent(values):
    """
    Calculate the mean of the middle 20% of values in a list or array.

    :param values: list or array of values.
    :return mean: the mean of the middle 20% of values.
    :raises TypeError: if `values` is not a list or array.

    """
    # Ensure that the input is a list or array
    if not isinstance(values, (list, np.ndarray,pd.Series)):
        raise TypeError("Input must be a list, array, or pd.Series.")
    
    if isinstance(values, pd.Series):
        values = values.tolist()

    # Get the length of the input and find the index of the first and last values in the middle 20%
    n = len(values)
    start_index = int(0.4 * n)
    end_index = int(0.6 * n)

    # Get the middle 20% of values
    middle_values = values[start_index:end_index] # keep in mind python indexing is exclusive of the last index.

    # Calculate and return the mean of the middle values
    return sum(middle_values) / len(middle_values)

def get_intervals(data, markers):
    """
    Calculate the intervals that each value in a list, array, or Pandas Series belongs to, based on a list, array, or Pandas Series of markers.

    :params data: list, array, or Pandas Series of data values.
    :params markers: list, array, or Pandas Series of markers that define the intervals.
    :return intervals: list of interval labels (strings), one for each value in `data`.
    :raises TypeError: if `data` or `markers` is not a list, array, or Pandas Series.

    """
    # Ensure that the input is a list, array, or Pandas Series
    if not isinstance(data, (list, np.ndarray, pd.Series)):
        raise TypeError("Input data must be a list, array, or Pandas Series.")
    if not isinstance(markers, (list, np.ndarray, pd.Series)):
        raise TypeError("Input markers must be a list, array, or Pandas Series.")

    # Convert the input data and markers to lists if they are arrays or Pandas Series
    if isinstance(data, np.ndarray):
        data = data.tolist()
    if isinstance(data, pd.Series):
        data = data.tolist()
    if isinstance(markers, np.ndarray):
        markers = markers.tolist()
    if isinstance(markers, pd.Series):
        markers = markers.tolist()

    # Initialize an empty list to store the intervals
    intervals = []

    # Iterate over the data values and determine the interval for each value
    for v in data:
        if v >= markers[0] and v < markers[1]:
            intervals.append('Interval 1')
        elif v >= markers[1] and v < markers[2]:
            intervals.append('Interval 2')
        elif v >= markers[2] and v < markers[3]:
            intervals.append('Interval 3')
        elif v >= markers[3] and v < markers[4]:
            intervals.append('Interval 4')
        elif v >= markers[4] and v < markers[5]:
            intervals.append('Interval 5')
        elif v >= markers[5] and v <= markers[6]:
            intervals.append('Interval 6')

    return intervals

### Load in raw data sheets

In [9]:
grey_values = pd.read_excel("./endogenous_VGLUT2_datasheet.xlsx",sheet_name=2) # reads in tab 3, includes all of Toby's sections and most of mine. 
grey_values.head()

Unnamed: 0,ID,AGE_PN,STAGE,AREA,SIDE,UNIQUE_ID,SAMPLE_ID,Distance_microns,VGLUT2,SATB2,DAPI,Experimenter
0,1182A,23.0,23,SS,L,1182AP23S23SSLTB,1182AP23S23SS,0.0,631.453,874.802,1107.635,TB
1,1182A,23.0,23,SS,L,1182AP23S23SSLTB,1182AP23S23SS,0.454,649.632,893.142,1110.61,TB
2,1182A,23.0,23,SS,L,1182AP23S23SSLTB,1182AP23S23SS,0.908,666.433,913.247,1115.59,TB
3,1182A,23.0,23,SS,L,1182AP23S23SSLTB,1182AP23S23SS,1.362,683.22,932.81,1129.68,TB
4,1182A,23.0,23,SS,L,1182AP23S23SSLTB,1182AP23S23SS,1.816,700.702,951.333,1149.838,TB


### Select stage and region for layer marker calibration

In [25]:
# Layer positions are examined stage-by-stage.
layer_markers = pd.read_excel("./endogenous_VGLUT2_datasheet.xlsx",sheet_name=1) # reloads the sheet (not super necessary), selects the 2nd tab.  
stage = 23.0 # identifies the stage of interest
subset_layer_markers = layer_markers[layer_markers['STAGE']==stage] # subset the layer markers to the stage of interest.
subset_layer_markers.head() # check that it worked.

Unnamed: 0,ID,AGE_PN,STAGE,AREA,SIDE,UNIQUE_ID,Y,Position
0,1294a,21,23,SS,R,1294a23SSR,350.395,1
1,1294a,21,23,SS,R,1294a23SSR,380.555,2
2,1294a,21,23,SS,R,1294a23SSR,409.726,3
3,1294a,21,23,SS,R,1294a23SSR,461.147,4
4,1294a,21,23,SS,R,1294a23SSR,475.98,5


In [26]:
# Normalises the layers to the range 0-1.
# Based on one bilateral representative sample from each stage (L and R values averaged).
normed_markers = []
for i in subset_layer_markers.groupby("UNIQUE_ID"):
    df = i[1]
    layer_values = df['Y'].tolist()
    norm_layer_values = normalise_values(layer_values)
    norm_layer_values = [round(v,3) for v in norm_layer_values]
    normed_markers.append(norm_layer_values)
normed_markers

[[0.0, 0.111, 0.23, 0.48, 0.565, 0.71, 1.0],
 [0.0, 0.125, 0.246, 0.46, 0.522, 0.626, 1.0],
 [0.0, 0.084, 0.218, 0.36, 0.457, 0.575, 1.0],
 [0.0, 0.097, 0.236, 0.407, 0.521, 0.638, 1.0]]