# Jupyter Notebook UI to graph your TAP data!

Version 1.7 - Joseph Liang, Rankin Lab
Updated:
1. Upgraded folder path selection application
2. Upgraded dataset management (less moving parts for end-user)
3. output changed from tif -> png
4. Added tap-correction function for missing taps (should be working now)
5. Added plate column to evaluate plate-discrepancies
6. Added separate graphing function for plate-discrepancies
7. Added colour palette-setting function

## Known bug: Step 2 an empty windows displays in Mac. May also apply to linux/windows.

## Beginner Essentials:
1. Shift-Enter to run each cell. After you run, you should see an output "done step #". If not, an error has occured
2. When inputting your own code/revising the code, make sure you close all your quotation marks '' and brackets (), [], {}.
3. Don't leave any commas (,) hanging! (make sure an object always follows a comma. If there is nothing after a comma, remove the comma!
4. Learning to code? Each line of code is annotated to help you understand how this code works!

## 3. Run all cells/steps sequentially, even the ones that do not need input

## Steps that require input: #3, #6.1, #7

# Below is for Tap-Habituation Graphs

# 1. Importing Packages Required (No input required, just run)

In [1]:
import pandas as pd #<- package used to import and organize data
import numpy as np #<- package used to import and organize data
import math
import os #<- package used to work with system file paths
import seaborn as sns #<- package used to plot graphs
from matplotlib import pyplot as plt #<- another package used to plot graphs
from itertools import cycle #<- package used to iterate down rows (used in step 5 to add tap column)
import ipywidgets as widgets #<- widget tool to generate button and tab for graphs
from IPython.display import display #<- displays widgets
from tkinter import Tk, filedialog #<- Tkinter is a GUI package
print("done step 1")

done step 1


# 2. Pick filepath (just run and click button from output)

Run the following cell and click the button 'Select Folder' to pick a filepath.

## Important: Later on, this script uses the total file path for each file to import and group data. That means if your folder has whatever your strain is named, the script will not work.

(ex. if your folder has "N2" in it this script sees all files inside this folder as having the "N2" search key)

## An easy fix is to just rename your folder to something else (make your strains lower-case, or just have the date)

In [5]:
### Select Folder App - After you run, you can select your folder for filepath
button = widgets.Button(description = 'Select Folder') #<- Creates a button variable
display(button) #<- displays this button on output
def select_folder(b): #<- This is an action. Requires a variable, so I put in an arbitrary one 'b'
    global folder_path #<- sets a variable as a global variable, not just within this action
    Tk().withdraw() #<- Tkinter likes to create annoying empty windows. This removes them
    folder_path = filedialog.askdirectory() #<- Opens up a file explorer window, and determines folder path
    Tk().update()#<- below
    Tk().destroy()#<- this and the line above it removes the file explorer window after a selection is made
    print(folder_path) #<- this helps confirm that this action was performed
    print('done step 2')
button.on_click(select_folder) #<- describes what happens when you click on this button

Button(description='Select Folder', style=ButtonStyle())

/Users/Joseph/Desktop/N2_parafilm_2022_08_20
done step 2


In [None]:
print(folder_path)

# 3. User Defined Variables (Add input here)

Here, we add some constants to help you blaze through this code.

3.1: Number of taps is pretty self-explanatory. How any taps does your experiment have? put in that number + 1 (N+1)!

This may be a bit confusing, but this is due to some coding syntax that you don't have to worry about.

3.2: Change your ISI number. This will be reflected in the name/title of the output figure.

In [3]:

number_of_taps = 30 ###<------ Taps in your experiment.

number_taps = range(1, number_of_taps + 1)  #<- do not change this


# if you have different ISIs in the same folder, then come back and change this 
# when you are graphing for the second set of data with the other ISI 
# (Generally data from same ISIs are graphed together)
# If changing ISI mid-analysis, you can just skip straight to step 8 after running this cell again

ISI = 10  ### <- What is your ISI? change accordingly
first_tap = 600 ### <- when is your first tap? check your TRV files

#Here, open up one of the trv files to determine the times for each of these taps. 
lower = np.arange(first_tap-1, first_tap-1+(number_of_taps*ISI), ISI) #(first tap, last tap+10s, ISI)
upper = np.arange(first_tap+1, first_tap+1+(number_of_taps*ISI), ISI) #(first tap, last tap+10s, ISI)
tolerances = list(zip(lower, upper)) 
taps = [i for i in range(1,number_of_taps+1)]

#### Add 31st Tap Here --------------------------------------------------<------------
# tolerances.append((1188,1191))
# taps.append(31)


#assign each tolerance to a tap number
accurate_taps = list((zip(taps, tolerances)))
print(accurate_taps)

for pair in accurate_taps:
   tap = pair[0]
   tolerance = pair[1]
   print("tap "+str(tap), "tolerance="+str(tolerance))

print("done step 3")

[(1, (599, 601)), (2, (609, 611)), (3, (619, 621)), (4, (629, 631)), (5, (639, 641)), (6, (649, 651)), (7, (659, 661)), (8, (669, 671)), (9, (679, 681)), (10, (689, 691)), (11, (699, 701)), (12, (709, 711)), (13, (719, 721)), (14, (729, 731)), (15, (739, 741)), (16, (749, 751)), (17, (759, 761)), (18, (769, 771)), (19, (779, 781)), (20, (789, 791)), (21, (799, 801)), (22, (809, 811)), (23, (819, 821)), (24, (829, 831)), (25, (839, 841)), (26, (849, 851)), (27, (859, 861)), (28, (869, 871)), (29, (879, 881)), (30, (889, 891))]
tap 1 tolerance=(599, 601)
tap 2 tolerance=(609, 611)
tap 3 tolerance=(619, 621)
tap 4 tolerance=(629, 631)
tap 5 tolerance=(639, 641)
tap 6 tolerance=(649, 651)
tap 7 tolerance=(659, 661)
tap 8 tolerance=(669, 671)
tap 9 tolerance=(679, 681)
tap 10 tolerance=(689, 691)
tap 11 tolerance=(699, 701)
tap 12 tolerance=(709, 711)
tap 13 tolerance=(719, 721)
tap 14 tolerance=(729, 731)
tap 15 tolerance=(739, 741)
tap 16 tolerance=(749, 751)
tap 17 tolerance=(759, 761)
t

# 4. Constructing Filelist From Source File/Select File (Just run)

In [6]:
#folder_path = '/Users/Joseph/Desktop/AVR14_10sISI' #- manual folder path if Tkinter is acting up

os.chdir(folder_path) #<- setting your working directory so that your images will be saved here

filelist = list() #<- empty list
for root, dirs, files in os.walk(folder_path): #<- this for loop goes through your folder 
    for name in files:
        if name.endswith('.trv'): #<- and takes out all files with a .trv (file that contains your data)
            filepath = os.path.join(root, name) #<- Notes down the file path of each data file
            filelist.append(filepath) #<- saves it into the list
    
print(filelist)
print('done step 4')

['/Users/Joseph/Desktop/N2_parafilm_2022_08_20/Yes_Parafilm/20220820_093722/N2_P_8_10x2_t96h20C_600s30x10s_B_2022_08_20.trv', '/Users/Joseph/Desktop/N2_parafilm_2022_08_20/Yes_Parafilm/20220820_090154/N2_P_2_10x2_t96h20C_600s30x10s_B_2022_08_20.trv', '/Users/Joseph/Desktop/N2_parafilm_2022_08_20/Yes_Parafilm/20220820_101520/N2_7_10x2_t96h20C_600s30x10s_A_2022_08_20.trv', '/Users/Joseph/Desktop/N2_parafilm_2022_08_20/Yes_Parafilm/20220820_085200/N2_P_3_10x2_t96h20C_600s30x10s_C_2022_08_20.trv', '/Users/Joseph/Desktop/N2_parafilm_2022_08_20/Yes_Parafilm/20220820_093953/N2_P_1_10x2_t96h20C_600s30x10s_A_2022_08_20.trv', '/Users/Joseph/Desktop/N2_parafilm_2022_08_20/Yes_Parafilm/20220820_092820/N2_P_9_10x2_t96h20C_600s30x10s_C_2022_08_20.trv', '/Users/Joseph/Desktop/N2_parafilm_2022_08_20/Yes_Parafilm/20220820_091210/N2_P_6_10x2_t96h20C_600s30x10s_C_2022_08_20.trv', '/Users/Joseph/Desktop/N2_parafilm_2022_08_20/Yes_Parafilm/20220820_095755/N2_4_10x2_t96h20C_600s30x10s_A_2022_08_20.trv', '/U

# 5. Process Data Function (Just Run)

In [None]:
def ProcessData(strain): #<- an example of a user-defined function
    strain_filelist = [x for x in filelist if strain in x] #<- goes through the list and filters for keyword
    Strain_N = len(strain_filelist) #<- N per strain, or number of plates
    Plate_N = 1
    print(f'this strain/treatment has {Strain_N} plates') #<- will output as the first number
    if Strain_N == 0:
        raise AssertionError ('{} is not a good identifier'.format(strain))
    else:
        pass
#     for f in strain_filelist:
#         DF_Total = pd.concat(pd.read_csv(f, sep=' ', skiprows = 4, header = None))
        DF_Total = pd.concat([pd.read_csv(f, sep=' ', skiprows = 4, header = None) for f in strain_filelist],
                      ignore_index=True) #<- imports your data files
#         DF_Total = DF_Total.dropna(axis = 1) #<- cleans your data
        DF_Total = DF_Total.rename( #<- more cleaning
                    {0:'time',
                    2:'rev_before',
                    3:'no_rev',
                    4:'stim_rev',
                    7:'dist',
                    8:'dist_std',
                    9:'dist_stderr',
                    11:'dist_0th',
                    12:'dist_1st',
                    13:'dist_2nd',
                    14:'dist_3rd',
                    15:'dist_100th',
                    18:'dura',
                    19:'dura_std',
                    20:'dura_stderr',
                    22:'dura_0th',
                    23:'dura_1st',
                    24:'dura_2nd',
                    25:'dura_3rd',
                    26:'dura_100th'}, axis=1)
        #check function here for NaN Columns
        DF_Total['plate'] = 0
        DF_Total['prob'] = DF_Total['stim_rev']/ (DF_Total['no_rev'] + DF_Total['stim_rev']) #<- calculate prob
        DF_Total['speed'] = DF_Total['dist']/DF_Total['dura'] #<- calculate speed
        DF_Total_rows = int(DF_Total.shape[0])
        print(f'this strain/treatment has {DF_Total_rows} total taps') #<- Outputs as the second number. Check if you are missing taps!
        DF_Final = DF_Total[["time", "dura", "dist", "prob", "speed", "plate"]].copy()

    return{
            'N': Strain_N,
            'Confirm':DF_Total,
            'Final': DF_Final}
    print('done!')



def assign_taps(DF, tolerances):
    DF['taps'] = ""
    for taps, tolerance in enumerate(tolerances): #[(99, 101), (109,111), ...]
        tap_lower,tap_upper = tolerance
        TimesInTapRange = DF['time'].between(tap_lower,tap_upper, inclusive=True)
        DF.loc[TimesInTapRange,'taps'] = taps+1 #set the tap to i where times are between



    
def insert_plates(df):   
    """This function inserts a plate column into a dataframe.
    
    :param df: any dataframe
    :type: pandas.core.frame.DataFrame
    
    :return: dataframe with a plate column
    :type: pandas.core.frame.DataFrame
    """
    df['plate']=(df['taps'] ==1).cumsum()


            
print('done step 5')

# 6.1 Setting Experimental groups (Upgraded: Now Automated, Input Minimal/No Longer Required)

In [None]:
strainnames=[]
for f in filelist:
    strainnames.append(f.split('/')[5]) 
    #adjust the number in [] above until you see your groups
ustrainnames=list(set(strainnames))
print(ustrainnames)

nstrains=list(range(1,len(ustrainnames)+1))
print(nstrains)

StrainNames = {}
StrainNames = {nstrains[i]: ustrainnames[i] for i in range(len(nstrains))}
print(StrainNames)
print("done")

## Just in case the above cell doesnt work, original cell and instruction still included below:

Here is the hardest part - From your naming convention, you need to pick a unique identifier for each group.

This means that all of names of your files for that strain should have that in common but is not commone with across all other files! If you did a good job naming your files and following a good naming convention, this should be easy.

## Be careful and really look hard in your naming structure. Note you want an unique identifier in the entire file path for the same group of files. An easy mistake is to have the strain name in the overall folder name, in this case if you use your strain name as a keyword it would include all files in that folder!

For example, if all your N2 files have a certain pattern like "N2_5x4" in this following example:
'/Users/Joseph/Desktop/AVR14_10sISI_TapHab_0710_2019/N2/20190710_141740/N2_5x4_f94h20c_100s30x10s10s_C0710ab.trv'

Then you need to set that identifier for the strain keyword:
'Strain_1' = 'N2_5x4'

## Depending on how many strains you are running for comparison, you may need to add/delete some lines!

## You are not naming your data groups here, we have a step for that later!
## Here, you want to note down ALL the strains you have in the folder

If you have just 2 strains, add hashtags (#) in front of the lines you do not need.
If you need more strains, just add more lines, following the same format!

In [None]:
# This Cell Is Now Defunct

# #Format: 'Strain_#'' = 'unique_identifier'

# ### Make into dictionary
# StrainNames = {
#     'Strain_1' : 'Yes_Parafilm',  #<- each strain will be designated to a unique identifier here
#     'Strain_2' : 'No_Parafilm',
# #     'Strain_3' : 'hipr-1_tm14191',
# #     'Strain_4' : 'cpr-5_ok2344',
# #     'Strain_5' : '2_P',
# #     'Strain_6' : '3_P',
# #     'Strain_7' : 'S1P11', #<- empty entries are for those hardcore trackers that tracks this many strains
# #     'Strain_8' : 'S2P12',
# #     'Strain_9' : 'S1P12',
# #     'Strain_10' : 'S2P13',
# #     'Strain_11' : 'S1P13',
# #     'Strain_12' : 'S2P14',
# #     'Strain_13' : 'S1P14',
# #     'Strain_14' : 'S2P15',
# #     'Strain_15' : '',
# #     'Strain_10' : '',
# #     'Strain_11' : '',
# #     'Strain_12' : '',
# #     'Strain_13' : '',
# #     'Strain_14' : '',
# #     'Strain_15' : '',
# }
# #...etc, etc

# print('done step 6.1')

# 6.2 Process Data (just run this cell)

In [None]:
DataLists = [0]  #<- generates empty list. 0 is there to account for python's index starting at 0. 
# we want indexing to start at 1 (when I say #1 I want the first point, not the second point)

for s in StrainNames.values():#<- goes through the dictionary in step 6.1 and processes data
    if not s == '':
        DataLists.append(ProcessData(s)['Final']) #<- appends all data into a list of dataframes


for df in DataLists[1:]: 
    assign_taps(df, tolerances)
for df in DataLists[1:]:    
    insert_plates(df)

# print(x)
# print(DataLists[0])
# print(DataLists[1])
#print(DataLists[2])
#print(len(DataLists))        
print('done step 6.2')

In [None]:
# with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
#     print(DataLists[3])
# DataLists[3].loc[31:59,"plate"]=5
# with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
#     print(DataLists[3])

# Step 7 (Now Automated, No Input Required)

In [None]:
TotalConcatenated=pd.concat(df.assign(Strain=StrainNames.get(i+1)) for i,df in enumerate(DataLists[1:]))
print(TotalConcatenated)
TotalConcatenated.to_csv("output.csv")
print('done step 7')

## In case above cell doesn't work, below is original cell and instructions

# 7B*. Grouping Data and Naming (Optional: Add input here)

Here, you get to name your data groups/strain! Name your groups however you like under between the quotation marks for each strain.

For example: If your Strain1 is N2 and you wish for the group to be called N2,
your line should look like:

DataLists[x].assign(dataset = 'N2')

## Go back to step 6.1 to check which strain is which item on the DataLists.
In this example, the first item on DataLists is N2.


## Remember: Put your name in quotes. (ex: 'N2' and not N2)

As default, the names are set to the unique identifier labels.

## Depending on the number of strains you are running the comparison, you may have to delete/add lines of code (following the same format). 
## Remember to add/delete commas too.

# If you want to change your groups, you do that here. 
For example, if you have 5 strains in your folder but only want to compare between 2 or 3 strains, designate that here and follow through with steps 6 and 7. Once you are done, come back to step 6 and change your groups again (You are going to have to change your graph titles for the second run-through though)!

In [None]:
# # This Cell Is Now Defunct 

# TotalConcatenated = pd.concat([ #<- this function joins your data together with an extra column for which group 
# #                         DataLists[1].assign(dataset = "N2"),
# #                         DataLists[2].assign(dataset = "glo-1 (tm3240)"),
# #                         DataLists[3].assign(dataset = "bas-1 (tm351)"),
# #                         DataLists[4].assign(dataset = "vps-35 (ok1880)"),
#                         DataLists[1].assign(dataset = StrainNames.get("Strain_1")),
#                         DataLists[2].assign(dataset = StrainNames.get("Strain_2")),
# #                         DataLists[3].assign(dataset = StrainNames.get("Strain_3")),
# #                         DataLists[4].assign(dataset = StrainNames.get("Strain_4")),
# #                         DataLists[6].assign(dataset = StrainNames.get("Strain_6")),
# #                         DataLists[7].assign(dataset = StrainNames.get("Strain_7")),
# #                         DataLists[8].assign(dataset = StrainNames.get("Strain_8")),
# #                         DataLists[9].assign(dataset = StrainNames.get("Strain_9")),
# #                         DataLists[10].assign(dataset = StrainNames.get("Strain_10")),
# #                         DataLists[11].assign(dataset = StrainNames.get("Strain_11")),
# #                         DataLists[12].assign(dataset = StrainNames.get("Strain_12")),
# #                         DataLists[13].assign(dataset = StrainNames.get("Strain_13")),
# #                         DataLists[14].assign(dataset = StrainNames.get("Strain_14")),
# #                           DataLists[2].assign(dataset = "glo-1 (zu391)"),
# #                           DataLists[3].assign(dataset = "src-1 (ok2685)"),
# #                           DataLists[4].assign(dataset = "dpy-22 (e652)"),
# #                           DataLists[4].assign(dataset = "dpy-22 (sy622)"),
#                           #DataLists[6].assign(dataset = "Pan-neuronal Specific"),
#                           #DataLists[7].assign(dataset = "GOA1 Mutant (n1134)"),
#                           #DataLists[8].assign(dataset = "GOA1 Mutant (n3055)"),
# #                           DataLists[5].assign(dataset = StrainNames.get("Strain_5")),
# #                         DataLists[5].assign(dataset = "StrainNameOnePoster"), #<----- example of custom name
                          
# ])
# # ...etc etc

# TotalConcatenated.reset_index(inplace=False)
# print(TotalConcatenated)

# #if TotalConcatenated["taps"].loc[ind] is not 1:
# #   TotalConcatenated["taps"].loc[ind:indices[c+1]] = list(range(1,len(TotalConcatenated["taps"].loc[ind:indices[c+1]])+1))
# # missing_taps(TotalConcatenated, accurate_taps, tolerances)

# print('done step 7')

In [None]:
# dpy22=TotalConcatenated[TotalConcatenated['dataset']=='dpy-22_e652']
# print(dpy22)
# dpy22.to_csv('dpy22.csv')

# Setting Colour Palette - Only run the below cell ONCE

The following code sets the colour palette for the whole experiment - and then designate one colour to each strain. After this, if as you are graphing you take away some strains, you can do so with the colours still matching accordingly.

In [None]:
# If you want the default colour palette #
strains = TotalConcatenated['dataset'].unique()
pal = dict(zip(strains, sns.color_palette("deep", n_colors=len(strains))))
pal = dict(zip(strains, ['darkgray','gainsboro']))
print('palette is:' + str(pal))

# IF YOU WANT TO CUSTOMIZE THE COLOR PALETTE OF THE GRAPHS
# pal = dict(zip(strains, ['color1','color2', ...etc etc]))

print('done setting colour palette')

In [None]:
# Outputs final data onto a spreadsheet
TotalConcatenated.to_csv("output.csv")

# 7.5: BONUS - Graph by plates to check for anomolies across each strain/treatment

Here, we will be graphing each strain on their own by their individual plates - this will help us find any anomolies or outliers that we can then exclude!


In [None]:
Strain_pal = sns.choose_colorbrewer_palette('sequential')

In [None]:
StrainConcatenated = TotalConcatenated[
    TotalConcatenated.dataset == "hipr-1_tm14191"] #Change/dictate which strain you are analyzing
StrainName = "hipr-1_tm14191" #Also change this, for labeling purposes


out1 = widgets.Output() #<- this is part of the code that helps display your graphs in tabs
out2 = widgets.Output()
out3 = widgets.Output()
# out4 = widgets.Output()

tab = widgets.Tab(children = [out1, out2, out3]) #<- more code to support tab display
tab.set_title(0, 'Probability')
# tab.set_title(1, 'Distance')
tab.set_title(1, 'Duration')
tab.set_title(2, 'Speed')
display(tab)

plt.rcParams.update({'lines.markeredgewidth': 1})

# Probability
with out1:
    plt.figure(linewidth = 2.5) #<- Make your figure lines THICCCC for clean display (posters)
    plt.figure(figsize=(12,10)) #<- set your figure size (width, length)
    plt.gca().xaxis.grid(False) #<- gets rid of x-axis markers to make data look clean 
    ax = sns.pointplot(x="taps", #<- Here we use seaborn as our graphing package. 
                 y="prob",  
                 data = StrainConcatenated,
                 hue = 'plate', #<- Here we use the extra column from step 6 to separate by group
                 palette = Strain_pal, #<- Change colour palette here if you like
                 ci = 95) #<- Confidence interval. 95 = standard error
    plt.xlabel("Taps") #<- X-axis title
    plt.ylabel("Probability") #<- Y-Axis title
    plt.title(f"{StrainName} Probability of Tap Habituation, {ISI}ISI") #<- Figure Title
    plt.ylim(0,1)
    ax.legend(loc = 'upper right', fontsize = '12') #<- location of your legend
    # plt.savefig(f'{StrainName}_Probability_{ISI}ISI_ByPlate.png', format='png', dpi=450) #<- saves your file to your folder at certain DPI
    plt.show()

# # Distance
# with out2:
#     plt.figure(linewidth = 2.5)
#     plt.figure(figsize=(12,10))
#     plt.gca().xaxis.grid(False)
#     ax = sns.pointplot(x="taps", 
#                  y="dist", 
#                  data = StrainConcatenated,
#                  hue = 'plate',
#                  palette = Strain_pal,
#                  ci = 95)
#     plt.xlabel("Taps", fontsize = '12')
#     plt.ylabel("Distance", fontsize = '12')
#     plt.title(f"{StrainName} Distance of Tap Habituation, {ISI}ISI", fontsize = '16')
#     plt.ylim(0,1.5)
#     ax.legend(loc = 'upper right', fontsize = '12')
#     plt.savefig(f'{StrainName}_Distance_{ISI}ISI_ByPlate.png', format='png', dpi=450)
#     plt.show()

# Duration
with out2:
    plt.figure(linewidth = 2.5)
    plt.figure(figsize=(12,10))
    plt.gca().xaxis.grid(False)
    ax = sns.pointplot(x="taps", 
                 y="dura", 
                 data = StrainConcatenated,
                 hue = 'plate',
                 palette = Strain_pal,
                 ci = 95)
    plt.xlabel("Taps", fontsize = '12')
    plt.ylabel("Duration", fontsize = '12')
    plt.title(f"{StrainName} Duration of Tap Habituation, {ISI}ISI", fontsize = '16')
    plt.ylim(0,3)
    ax.legend(loc = 'upper right', fontsize = '12')
    # plt.savefig(f'{StrainName}_Duration_{ISI}ISI_ByPlate.png', format='png', dpi=450)
    plt.show()

# Speed
with out3:
    plt.figure(linewidth = 2.5)
    plt.figure(figsize=(12,10))
    plt.gca().xaxis.grid(False)
    ax = sns.pointplot(x="taps", 
                 y="speed", 
                 data = StrainConcatenated,
                 hue = 'plate',
                 palette = Strain_pal,
                 ci = 95)
    plt.xlabel("Taps", fontsize = '12')
    plt.ylabel("Speed", fontsize = '12')
    plt.title(f"{StrainName} Speed of Tap Habituation, {ISI}ISI", fontsize = '16')
    plt.ylim(0,0.5)
    ax.legend(loc = 'upper right', fontsize = '12')
    # plt.savefig(f'{StrainName}_Speed_{ISI}ISI.png_ByPlate', format='png', dpi=450)
    plt.show()

In [None]:
TotalConcatenated_S1s=TotalConcatenated[TotalConcatenated["dataset"==[]]]

# 8. Graph Data (Probability, Distance, Duration, Speed of Tap Habituation)

## Here, feel free to change the graph title and axis labels! Images are automatically saved in your folder with your data!

Note: It has been agreed by the lab that distance is quite an outdated measure, and the three top measures we consider are:

## probability, duration and speed.

In [None]:
# prefix="S1vsS2_"

out1 = widgets.Output() #<- this is part of the code that helps display your graphs in tabs
out2 = widgets.Output()
out3 = widgets.Output()
out4 = widgets.Output()

tab = widgets.Tab(children = [out1, out2, out3, out4]) #<- more code to support tab display
tab.set_title(0, 'Probability')
tab.set_title(1, 'Distance')
tab.set_title(2, 'Duration')
tab.set_title(3, 'Speed')
display(tab)

plt.rcParams.update({'lines.markeredgewidth': 1})
sns.set_context("notebook")
# Probability
with out1:
    plt.figure(linewidth = 2.5) #<- Make your figure lines THICCCC for clean display (posters)
    plt.figure(figsize=(12,10)) #<- set your figure size (width, length)
    plt.gca().xaxis.grid(False) #<- gets rid of x-axis markers to make data look clean 
    ax = sns.pointplot(x="taps", #<- Here we use seaborn as our graphing package. 
                 y="prob",  
                 data = TotalConcatenated,
                 hue = 'dataset', #<- Here we use the extra column from step 6 to separate by group
                 palette = pal, #<- Change colour palette here if you like
                 ci = 68) #<- Confidence interval. 95 = standard error
    plt.xlabel("Taps") #<- X-axis title
    plt.ylabel("Probability") #<- Y-Axis title
    plt.title(f"Probability of Tap Habituation, {ISI}ISI") #<- Figure Title
    plt.ylim(0,1)
    ax.legend(loc = 'upper right', fontsize = '12') #<- location of your legend
    plt.savefig(f'Probability_{ISI}ISI.png', format='png', dpi=450) #<- saves your file to your folder at certain DPI
    plt.show()

# Distance
with out2:
    plt.figure(linewidth = 2.5)
    plt.figure(figsize=(12,10))
    plt.gca().xaxis.grid(False)
    ax = sns.pointplot(x="taps", 
                 y="dist", 
                 data = TotalConcatenated,
                 hue = 'dataset',
                 palette = pal,
                 ci = 68  #ci=68 for sem
                      )
    plt.xlabel("Taps", fontsize = '12')
    plt.ylabel("Distance", fontsize = '12')
    plt.title(f"Distance of Tap Habituation, {ISI}ISI", fontsize = '16')
    plt.ylim(0,None)
    ax.legend(loc = 'upper right', fontsize = '12')
    plt.savefig(f'Distance_{ISI}ISI.png', format='png', dpi=450)
    plt.show()

# Duration
with out3:
    plt.figure(linewidth = 2.5)
    plt.figure(figsize=(12,10))
    plt.gca().xaxis.grid(False)
    ax = sns.pointplot(x="taps", 
                 y="dura", 
                 data = TotalConcatenated,
                 hue = 'dataset',
                 palette = pal,
                 ci = 68  #ci=68 for sem
                      )
    plt.xlabel("Taps", fontsize = '12')
    plt.ylabel("Duration", fontsize = '12')
    plt.title(f"Duration of Tap Habituation, {ISI}ISI", fontsize = '16')
    plt.ylim(0,None)
    ax.legend(loc = 'upper right', fontsize = '12')
    plt.savefig(f'Duration_{ISI}ISI.png', format='png', dpi=450)
    plt.show()

# Speed
with out4:
    plt.figure(linewidth = 2.5)
    plt.figure(figsize=(12,10))
    plt.gca().xaxis.grid(False)
    ax = sns.pointplot(x="taps", 
                 y="speed", 
                 data = TotalConcatenated,
                 hue = 'dataset',
                 palette = pal,
                 ci = 68 #ci=68 for sem
                      )
    plt.xlabel("Taps", fontsize = '12')
    plt.ylabel("Speed", fontsize = '12')
    plt.title(f"Speed of Tap Habituation, {ISI}ISI", fontsize = '16')
    plt.ylim(0,None)
    ax.legend(loc = 'upper right', fontsize = '12')
    plt.savefig(f'Speed_{ISI}ISI.png', format='png', dpi=450)
    plt.show()

# ANOVA and By-Tap Box-Plots

In [None]:
import pingouin as pg

In [None]:
TotalConcatenated[['Day', 'Parafilm']] = TotalConcatenated['dataset'].str.split('_', 1, expand=True)
# TotalConcatenated[['S', 'Seed']] = TotalConcatenated['Seeed'].str.split('S', 1, expand=True)
print(TotalConcatenated)

In [None]:
prob_anova = pg.anova(dv='prob', between=['Day', 'Parafilm'], data=TotalConcatenated,
               detailed=True).round(5)
dist_anova = pg.anova(dv='dist', between=['Day', 'Parafilm'], data=TotalConcatenated,
               detailed=True).round(5)
duration_anova = pg.anova(dv='dura', between=['Day', 'Parafilm'], data=TotalConcatenated,
               detailed=True).round(5)
speed_anova = pg.anova(dv='speed', between=['Day', 'Parafilm'], data=TotalConcatenated,
               detailed=True).round(5)
print("Probability ANOVA")
print(prob_anova)
print("Distance ANOVA")
print(dist_anova)
print("Duration ANOVA")
print(duration_anova)
print("Speed ANOVA")
print(speed_anova)

In [None]:
TapToAnalyze=1

FirstTap = TotalConcatenated[TotalConcatenated["taps"]==TapToAnalyze]
print(FirstTap)

In [None]:
prob_anova = pg.anova(dv='prob', between=['Day', 'Parafilm'], data=FirstTap,
               detailed=True).round(3)
dist_anova = pg.anova(dv='dist', between=['Day', 'Parafilm'], data=FirstTap,
               detailed=True).round(3)
duration_anova = pg.anova(dv='dura', between=['Day', 'Parafilm'], data=FirstTap,
               detailed=True).round(3)
speed_anova = pg.anova(dv='speed', between=['Day', 'Parafilm'], data=FirstTap,
               detailed=True).round(3)
print(f"Tap_{TapToAnalyze}_Probability ANOVA")
print(prob_anova)
print(f"Tap_{TapToAnalyze}_Distance ANOVA")
print(dist_anova)
print(f"Tap_{TapToAnalyze}_Duration ANOVA")
print(duration_anova)
print(f"Tap_{TapToAnalyze}_Speed ANOVA")
print(speed_anova)

In [None]:
prob_pairwise = pg.pairwise_tests(dv='prob', between=['Day', 'Parafilm'], data=FirstTap).round(3)
dist_pairwise = pg.pairwise_tests(dv='dist', between=['Day', 'Parafilm'], data=FirstTap).round(3)
dura_pairwise = pg.pairwise_tests(dv='dura', between=['Day', 'Parafilm'], data=FirstTap).round(3)
speed_pairwise = pg.pairwise_tests(dv='speed', between=['Day', 'Parafilm'], data=FirstTap).round(3)
print(f"Tap_{TapToAnalyze}_Probability Pairwise")
print(prob_pairwise)
print(f"Tap_{TapToAnalyze}_Distance Pairwise")
print(dist_pairwise)
print(f"Tap_{TapToAnalyze}_Duration Pairwise")
print(dura_pairwise)
print(f"Tap_{TapToAnalyze}_Speed Pairwise")
print(speed_pairwise)

In [None]:
prob_pairwise_tukey=FirstTap.pairwise_tukey(dv='dist', between='Plate').round(3)
with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
#     print(prob_pairwise_tukey)
    print(prob_pairwise_tukey[prob_pairwise_tukey['p-tukey']<= 0.05])


## Box Plot

In [None]:
melted=pd.melt(TotalConcatenated, id_vars=["time","plate","taps","dataset"], value_vars=["dura", "prob", "dist", "speed"], var_name="Metric", value_name="Measure")
print(melted)

In [None]:


g = sns.FacetGrid(melted, col="Metric",  row="taps", sharey='col', col_order=['prob','dura','dist','speed'])
g.map_dataframe(sns.boxplot, x="dataset", y="Measure")
g.set_xticklabels(rotation=90)
# g.axes[0].set_ylim((0,1))
# g.axes[1].set_ylim((0,3.5))
# g.axes[2].set_ylim((0,1))
# g.axes[3].set_ylim((0,0.4))

g.savefig('map_plot.pdf', format='pdf', dpi=450)

## First 3 taps

In [None]:
taps1to3=melted[melted.taps<=3]
# Can customize above line to get data for exact tap numbers you need

g = sns.FacetGrid(taps1to3, col="Metric",  row="taps", sharey='col', col_order=['prob','dura','dist','speed'])
g.map_dataframe(sns.boxplot, x="dataset", y="Measure")
g.set_xticklabels(rotation=90)
# g.axes[0].set_ylim((0,1))
# g.axes[1].set_ylim((0,3.5))
# g.axes[2].set_ylim((0,1))
# g.axes[3].set_ylim((0,0.4))

g.savefig('smaller_map_plot.png', format='png', dpi=450)

## Done Tap-Habituation portion

# Worm Size Measurement Portion

## Constructing .DAT Filelist for Worm Size Analysis

In [None]:
#folder_path = '/Users/Joseph/Desktop/lrk1_ATR_NoATR_08_15_2019'
print(folder_path)
os.chdir(folder_path) #<- setting your working directory so that your images will be saved here

filelist = list() #<- empty list
for root, dirs, files in os.walk(folder_path): #<- this for loop goes through your folder 
    for name in files:
        if name.endswith('.dat'): #<- and takes out all files with a .dat (file that contains your data)
            filepath = os.path.join(root, name) #<- Notes down the file path of each data file
            filelist.append(filepath) #<- saves it into the list
    
print(filelist)
print('done')

## 5. Process .dat Data Function (Just Run)

In [None]:
def dat_ProcessData(strain): #<- an example of a user-defined function
    strain_filelist = [x for x in filelist if strain in x] #<- goes through the list and filters for keyword
    Strain_N = len(strain_filelist) #<- N per strain, or number of plates
    Plate_N = 1
    print(f'this strain/treatment has {Strain_N} plates') #<- will output as the first number
    if Strain_N == 0:
        raise AssertionError ('{} is not a good identifier'.format(strain))
    else:
        pass
#     for f in strain_filelist:
#         DF_Total = pd.concat(pd.read_csv(f, sep=' ', skiprows = 4, header = None))
        DF_Total = pd.concat([pd.read_csv(f, sep=' ', header = None) for f in strain_filelist],
                      ignore_index=True) #<- imports your data files
#         DF_Total = DF_Total.dropna(axis = 1) #<- cleans your data
        DF_Total = DF_Total.rename( #<- more cleaning
                    {0:'Time',
                    1:'n',
                    2:'Number',
                    3:'Instantaneous Speed',
                    4:'Interval Speed',
                    5:'Bias',
                    6:'Tap',
                    7:'Puff',
                    8:'x',
                    9:'y',
                    10:'Width',
                    11:'Length',
                    12:'Area',
                    13:'Angular Speed',
                    14:'Aspect Ratio',
                    15:'Kink',
                    16:'Curve',
                    17:'Crab'}, axis=1)
        #check function here for NaN Columns
        DF_Total['plate'] = 0
        # DF_Total['prob'] = DF_Total['stim_rev']/ (DF_Total['no_rev'] + DF_Total['stim_rev']) #<- calculate prob
        # DF_Total['speed'] = DF_Total['dist']/DF_Total['dura'] #<- calculate speed
        # DF_Total_rows = int(DF_Total.shape[0])
        # print(f'this strain/treatment has {DF_Total_rows} total taps') #<- Outputs as the second number. Check if you are missing taps!
        # DF_Final = DF_Total[["time", "dura", "dist", "prob", "speed", "plate"]].copy()

    return{
            'N': Strain_N,
            'Confirm':DF_Total
            # 'Final': DF_Final
    }
    print('done!')


print('done step 5')

## Set StrainNames Dictionary (Automated - no input required)

In [None]:
strainnames2=[]
for f in filelist:
    strainnames2.append(f.split('/')[5]) 
    #adjust the number in [] above until you see your groups
ustrainnames2=list(set(strainnames2))
print(ustrainnames2)

nstrains2=list(range(1,len(ustrainnames2)+1))
print(nstrains2)

StrainNames2 = {}
StrainNames2 = {nstrains2[i]: ustrainnames2[i] for i in range(len(nstrains2))}
print(StrainNames2)
print("done")

## In case the above cell doesn't work as intended, the original code (commented out) is below:

In [None]:

# #Format: Strain_# = 'unique_identifier'

# ### Make into dictionary
# StrainNames = {
#     'Strain_1' : 'No_Parafilm',   #<- each strain will be designated to a unique identifier here
#     'Strain_2' : 'Yes_Parafilm',
# #     'Strain_3' : 'e1112_OffFood',
# #     'Strain_4' : 'e1112_OnFood',
# #     'Strain_5' : 'Test_OffFood',
# #     'Strain_6' : 'Test_OnFood',
# #     'Strain_5' : 'N2_NoFood',
# #     'Strain_6' : 'N2_Food',
# #     'Strain_9' : 'LX636_NoFood',  #<- empty entries are for those hardcore trackers that tracking this many strains
# #     'Strain_10' : 'LX636_Food',
# #     'Strain_11' : '',
# #     'Strain_12' : '',
# #     'Strain_13' : '',
# #     'Strain_14' : '',
# #     'Strain_15' : '',
# }
# #...etc, etc

# print('done step 6.1')
# print(StrainNames)

## Process Data

In [None]:
dat_DataLists = [0] #<- generates empty list. 0 is there to account for python's index starting at 0. 
# we want indexing to start at 1 (when I say #1 I want the first point, not the second point)

for s in tqdm(StrainNames2.values()): #<- goes through the dictionary in step 6.1 and processes data
    if not s == '':
        dat_DataLists.append(dat_ProcessData(s)['Confirm']) #<- appends all data into a list of dataframes

# print(DataLists[2])
print('done step 6.2')

## Grouping Data And Naming (Automated - No Input Required)

In [None]:
baseline=pd.concat(df.assign(Strain=StrainNames2.get(i+1)) for i,df in enumerate(dat_DataLists[1:]))
print(baseline)
baseline.to_csv("baseline_output.csv")
print('done step 7')

## If upgraded cell doesn't work, below is original code (commented out)

In [None]:
# # <------------------- This cell is now DEFUNCT -----------------------


# #to add a line, add a comma to the last line and press 'enter' to type into a new line


# baseline = pd.concat([DataLists[1].assign(Strain = "No Parafilm"),
#                                DataLists[2].assign(Strain = "Parafilm"),
#                               ])
# # baseline=baseline.dropna()
# baseline = baseline.reset_index(drop=True)
# print(baseline)
# baseline.to_csv("baseline_output.csv")
# print("done")


## Reformating Dataframe, taking baseline measures from 100s - 500s (before tap)

In [None]:
baseline_NP=baseline[baseline.Strain=='No_Parafilm']
baseline_NP.Strain='No Parafilm'
baseline_P=baseline[baseline.Strain=='Yes_Parafilm']
baseline_P.Strain='Parafilm'
baseline_NP['plate']=((baseline_NP.Time <=0.05)&(baseline_NP.Time >=0)).cumsum()
baseline_P['plate']=((baseline_P.Time <=0.05)&(baseline_P.Time >=0)).cumsum()
# print(baseline_NP)
# print(baseline_P)
data=pd.concat([baseline_NP,baseline_P])
print(data)

In [None]:
some_data=data[['Strain','plate','Time','Number','Width','Length','Area']]
some_data=some_data.rename(columns={'Strain': 'Dataset', 'plate':'Plate'})
some_data=some_data[(some_data.Time <= 500) & (some_data.Time >= 100)]

print(some_data)

## Calculating the mean measures of each plate

In [None]:
some_data_means= some_data.groupby(["Dataset", 'Plate'], as_index=False).mean()
print(some_data_means)

## Barplots

In [None]:
out1 = widgets.Output() #<- this is part of the code that helps display your graphs in tabs
out2 = widgets.Output()
out3 = widgets.Output()

tab = widgets.Tab(children = [out1, out2, out3]) #<- more code to support tab display
tab.set_title(0, 'Width')
tab.set_title(1, 'Length')
tab.set_title(2, 'Area')
display(tab)

plt.rcParams.update({'lines.markeredgewidth': 1})
sns.set_context("talk")
# Width
with out1:
    plt.figure(linewidth = 2.5) #<- Make your figure lines THICCCC for clean display (posters)
    # plt.figure(figsize=(12,10)) #<- set your figure size (width, length)
    plt.gca().xaxis.grid(False) #<- gets rid of x-axis markers to make data look clean 
    ax = sns.barplot(x="Dataset", #<- Here we use seaborn as our graphing package. 
                 y="Width",  
                 data = some_data_means,
                 hue = 'Dataset', #<- Here we use the extra column from step 6 to separate by group
                 palette = ['darkgray','gainsboro'], #<- Change colour palette here if you like
                 ci = 68,
                 dodge=False) #<- Confidence interval. 68 = standard error
    plt.xlabel("") #<- X-axis title
    plt.ylabel("Worm Width (mm)") #<- Y-Axis title
    plt.title("Average Worm Width") #<- Figure Title
    ax.legend_.remove()
    # plt.ylim(0,1)
    # ax.legend(loc = 'upper right', fontsize = '12') #<- location of your legend
    # plt.savefig(f'Probability_{ISI}ISI.png', format='png', dpi=900) #<- saves your file to your folder at certain DPI
    plt.show()

# Length
with out2:
    plt.figure(linewidth = 2.5) #<- Make your figure lines THICCCC for clean display (posters)
    # plt.figure(figsize=(12,10)) #<- set your figure size (width, length)
    plt.gca().xaxis.grid(False) #<- gets rid of x-axis markers to make data look clean 
    ax = sns.barplot(x="Dataset", #<- Here we use seaborn as our graphing package. 
                 y="Length",  
                 data = some_data_means,
                 hue = 'Dataset', #<- Here we use the extra column from step 6 to separate by group
                 palette = ['darkgray','gainsboro'], #<- Change colour palette here if you like
                 ci = 68,
                 dodge=False) #<- Confidence interval. 68 = standard error
    plt.xlabel("") #<- X-axis title
    plt.ylabel("Worm Length (mm)") #<- Y-Axis title
    plt.title("Average Worm Length") #<- Figure Title
    ax.legend_.remove()
    # plt.ylim(0,1)
    # ax.legend(loc = 'upper right', fontsize = '12') #<- location of your legend
    # plt.savefig(f'Probability_{ISI}ISI.png', format='png', dpi=900) #<- saves your file to your folder at certain DPI
    plt.show()

# Area
with out3:
    plt.figure(linewidth = 2.5) #<- Make your figure lines THICCCC for clean display (posters)
    # plt.figure(figsize=(12,10)) #<- set your figure size (width, length)
    plt.gca().xaxis.grid(False) #<- gets rid of x-axis markers to make data look clean 
    ax = sns.barplot(x="Dataset", #<- Here we use seaborn as our graphing package. 
                 y="Area",  
                 data = some_data_means,
                 hue = 'Dataset', #<- Here we use the extra column from step 6 to separate by group
                 palette = ['darkgray','gainsboro'], #<- Change colour palette here if you like
                 ci = 68,
                dodge=False) #<- Confidence interval. 68 = standard error
    plt.xlabel("") #<- X-axis title
    plt.ylabel("Worm Area (mm^2)") #<- Y-Axis title
    plt.title("Average Worm Area") #<- Figure Title
    ax.legend_.remove()
    # plt.ylim(0,1)
    # ax.legend(loc = 'upper right', fontsize = '12') #<- location of your legend
    # plt.savefig(f'Probability_{ISI}ISI.png', format='png', dpi=900) #<- saves your file to your folder at certain DPI
    plt.show()

## Boxplots

In [None]:
out1 = widgets.Output() #<- this is part of the code that helps display your graphs in tabs
out2 = widgets.Output()
out3 = widgets.Output()

tab = widgets.Tab(children = [out1, out2, out3]) #<- more code to support tab display
tab.set_title(0, 'Width')
tab.set_title(1, 'Length')
tab.set_title(2, 'Area')
display(tab)

PROPS = {
    'boxprops':{'edgecolor':'k'},
    'medianprops':{'color':'k'},
    'whiskerprops':{'color':'k'},
    'capprops':{'color':'k'}
}

plt.rcParams.update({'lines.markeredgewidth': 1})
sns.set_context("talk")
# Width
with out1:
    plt.figure(linewidth = 2.5) #<- Make your figure lines THICCCC for clean display (posters)
    # plt.figure(figsize=(12,10)) #<- set your figure size (width, length)
    plt.gca().xaxis.grid(False) #<- gets rid of x-axis markers to make data look clean 
    ax = sns.boxplot(x="Dataset", #<- Here we use seaborn as our graphing package. 
                 y="Width",  
                 data = some_data_means,
                 hue = 'Dataset', #<- Here we use the extra column from step 6 to separate by group
                 palette = ['darkgray','gainsboro'], #<- Change colour palette here if you like
                 showfliers=False,
                 dodge=False,
                 **PROPS) #<- Confidence interval. 68 = standard error
    ax.legend_.remove()
    ax=sns.stripplot(x="Dataset",
                     y="Width",
                     data=some_data_means,
                     size=6,
                     color="k")
    plt.xlabel("") #<- X-axis title
    plt.ylabel("Worm Width (mm)") #<- Y-Axis title
    plt.title("Average Worm Width") #<- Figure Title
    # plt.setp(ax.artists, edgecolor = 'k')
    # plt.setp(ax.lines, color='k')
    # plt.ylim(0,1)
    # ax.legend(loc = 'upper right', fontsize = '12') #<- location of your legend
    plt.savefig("Average Worm Width.png", format='png', dpi=450) #<- saves your file to your folder at certain DPI
    plt.show()

# Length
with out2:
    plt.figure(linewidth = 2.5) #<- Make your figure lines THICCCC for clean display (posters)
    # plt.figure(figsize=(12,10)) #<- set your figure size (width, length)
    plt.gca().xaxis.grid(False) #<- gets rid of x-axis markers to make data look clean 
    ax = sns.boxplot(x="Dataset", #<- Here we use seaborn as our graphing package. 
                 y="Length",  
                 data = some_data_means,
                 hue = 'Dataset', #<- Here we use the extra column from step 6 to separate by group
                 palette = ['darkgray','gainsboro'], #<- Change colour palette here if you like
                 showfliers=False,
                 dodge=False,
                **PROPS) #<- Confidence interval. 68 = standard error
    ax.legend_.remove()
    ax=sns.stripplot(x="Dataset",
                 y="Length",
                 data=some_data_means,
                 size=6,
                 color="k")
    plt.xlabel("") #<- X-axis title
    plt.ylabel("Worm Length (mm)") #<- Y-Axis title
    plt.title("Average Worm Length") #<- Figure Title
    # plt.ylim(0,1)
    # ax.legend(loc = 'upper right', fontsize = '12') #<- location of your legend
    plt.savefig("Average Worm Length.png", format='png', dpi=450) #<- saves your file to your folder at certain DPI
    plt.show()

# Area
with out3:
    plt.figure(linewidth = 2.5) #<- Make your figure lines THICCCC for clean display (posters)
    # plt.figure(figsize=(12,10)) #<- set your figure size (width, length)
    plt.gca().xaxis.grid(False) #<- gets rid of x-axis markers to make data look clean 
    ax = sns.boxplot(x="Dataset", #<- Here we use seaborn as our graphing package. 
                 y="Area",  
                 data = some_data_means,
                 hue = 'Dataset', #<- Here we use the extra column from step 6 to separate by group
                 palette = ['darkgray','gainsboro'], #<- Change colour palette here if you like
                 showfliers=False,
                dodge=False,
                **PROPS) #<- Confidence interval. 68 = standard error
    ax.legend_.remove()
    ax=sns.stripplot(x="Dataset",
                 y="Area",
                 data=some_data_means,
                 size=6,
                 color="k")
    plt.xlabel("") #<- X-axis title
    plt.ylabel("Worm Area (mm^2)") #<- Y-Axis title
    plt.title("Average Worm Area") #<- Figure Title
    # plt.ylim(0,1)
    # ax.legend(loc = 'upper right', fontsize = '12') #<- location of your legend
    plt.savefig("Average Worm Area.png", format='png', dpi=450) #<- saves your file to your folder at certain DPI
    plt.show()

# Done

## Extra Consideration (Weighted V.S. Unweighted Plate Means)

In [None]:
unweighted_means= some_data.groupby(["Dataset"], as_index=False).mean()
print(unweighted_means)

In [None]:
weighted_data=some_data_means.groupby(
    some_data_means.Dataset).apply(
    lambda x: np.average(
        x.Width, weights=x.Number))
print(weighted_data)

In [None]:
def weighted_se(input_df):
    weights = input_df['Number']
    vals = input_df['Area']

    weighted_avg = np.average(vals, weights=weights)
    
    numer = np.sum(weights * (vals - weighted_avg)**2)
    denom = ((vals.count()-1)/vals.count())*np.sum(weights)
    
    return np.sqrt(numer/denom)/np.sqrt(np.sum(weights))