# Jupyter Notebook to Analyize and Visualize BSR Disease Model csv data


# 1. Importing Packages Required (No input required, just run)

In [None]:
import pandas as pd #<- package used to import and organize data
import numpy as np #<- package used to import and organize data
import seaborn as sns #<- package used to plot graphs
from matplotlib import pyplot as plt #<- package used to plot graphs
import os #<- package used to work with system filepaths
from ipywidgets import widgets #<- widget tool to generate button
from IPython.display import display #<- displays button
# from tkinter import Tk, filedialog #<- Tkinter is a GUI package
from tqdm.notebook import tqdm
from ipyfilechooser import FileChooser
# import dask.dataframe as dd
import pingouin as pg
import random
pd.set_option('display.max_columns', 50)
print("done step 1")

# 2. Pick filepath (just run and click button)

Run the following cell and clicke the button 'Select Folder' to pick a filepath.

## Important: Later on, this script uses the total file path for each file to import and group data. That means if your folder has whatever your strain is named, the script will not work.

(ex. if your folder has "N2" in it this script sees all files inside this folder as having the "N2" search key)

## An easy fix is to just rename your folder to something else (make your strains lower-case, or just have the date)

In [None]:
starting_directory='/Users/Joseph/Desktop/'
chooser=FileChooser(starting_directory)
display(chooser)

In [None]:
folder_path=chooser.selected_path
print(folder_path)

In [None]:
BSR_Data = pd.read_csv(chooser.selected).drop(columns=['Unnamed: 0'])

In [None]:
print(BSR_Data)

In [None]:
BSR_Data['Allele'] = BSR_Data['Genotype'].str.split(pat='(', expand=True)[1]
BSR_Data['Allele'] = BSR_Data['Allele'].str.split(pat=')', expand=True)[0]
print(BSR_Data)

In [None]:
BSR_Data['Allele'] = BSR_Data['Allele'].fillna('N2')

In [None]:
print(len(BSR_Data['Gene'].unique()))

In [None]:
BSR_Data1=pd.read_csv('/Users/Joseph/Desktop/BSR_Screen/Disease_Model_Controls/BSR_LRRK2Model_data_by_worm.csv').drop(columns=['Unnamed: 0'])
print(BSR_Data1)

In [None]:
BSR_Data_new=pd.concat([BSR_Data,BSR_Data1])
print(BSR_Data_new)

## Just For Worm Speed:

### By Gene:

In [None]:
worm_avg_speed = BSR_Data[['Genotype', 'Treatment', 'speed']]
avg_speed = worm_avg_speed.groupby(["Genotype", 'Treatment'], as_index=False).mean()
print(avg_speed)
print("-"*50)
speed_diff = avg_speed.drop(columns=["Treatment"]).groupby("Genotype", as_index=False).diff(periods=-1).dropna()
print(speed_diff)
print("-"*50)
speed_diff.insert(loc=0, column='Genotype', value=avg_speed['Genotype'])
speed_diff=speed_diff.reset_index(drop=True)
print(speed_diff)
print("-"*50)
# speed_diff=speed_diff.sort_values(by=["speed"])
# print(speed_diff)
print("-"*50)
print(speed_diff.iloc[3,1])
speed_diff['Score']=speed_diff['speed']-speed_diff.iloc[0,1]
print("-"*50)
print(speed_diff)
print("-"*50)
speed_diff=speed_diff.sort_values(by=["Score"])
print("-"*50)
print(speed_diff)
# speed_diff.to_csv("/Users/Joseph/Desktop/BSR_LRRK2Model_speed_diff.csv")

In [None]:
SNCA_diff = speed_diff.head(2)
# SNCA_diff = speed_diff.drop(1)
print(SNCA_diff)
SNCA_diff.to_csv("/Users/Joseph/Desktop/BSR_hSNCAModel_speed_diff.csv")

In [None]:
Gene_Orthology=pd.read_csv('/Users/Joseph/Desktop/BSR_Screen/Gene_Orthology.csv').drop(columns=['Unnamed: 0'])
Gene_Orthology=Gene_Orthology.rename(columns={"Gene":"Orthology", "Worm Gene":"Gene"})
print(Gene_Orthology)

In [None]:
speed_diff_orthology=pd.merge(speed_diff,
                     Gene_Orthology,
                     on='Gene',
                     how='left')

speed_diff_orthology=speed_diff_orthology.rename(columns={"Gene":"Worm Gene", "Orthology":"Gene"})
print(speed_diff_orthology)
print("-"*50)
print(speed_diff_orthology[speed_diff_orthology['Gene'].isna()])

In [None]:
speed_diff_orthology=pd.merge(speed_diff,
                     Gene_Orthology,
                     on='Gene',
                     how='left')

speed_diff_orthology=speed_diff_orthology.rename(columns={"Gene":"Worm Gene", "Orthology":"Gene"})
print(speed_diff_orthology)
print("-"*50)
print(speed_diff_orthology[speed_diff_orthology['Gene'].isna()])

## Making the bar plot

In [None]:
Title = "Basal Slowing (Speed Off Food - Speed On Food), mm/s" #<---------------- Input Here -----------------
FileName = "LRRK2 BSR"
sns.set_context("poster")
plt.figure(linewidth = 1)
plt.figure(figsize=(15, 5))
plt.gca().xaxis.grid(False)
ax = sns.barplot(x="Genotype", 
                 y = 'speed', 
                 data = SNCA_diff, #<--------------- Input Here -------------
             # color = 'CornflowerBlue',
                 hue = 'Genotype',
                 # palette = [sns.color_palette()[1],'black'],
                 palette = ['black',sns.color_palette()[0],sns.color_palette()[1]],
                 legend = False
             # edgecolor="DodgerBlue",
             # linewidth = 2.5,
#              seed=5,
#              ci = 95, # 'sd' for SD, '68' for SEM
#              palette = ['grey',
#                         'black'
#                        ]
                )
# sns.move_legend(ax, "upper left", bbox_to_anchor=(1, 1))
plt.xlabel("")
plt.xticks(rotation=90)
plt.ylabel("Basal Slowing")
# plt.ylim(top = 0.2)
# plt.ylim(bottom = -0.03)
plt.title("")
# plt.legend(bbox_to_anchor=(1.02, 1), loc=2, borderaxespad=0.)
plt.savefig(f'/Users/Joseph/Desktop/{FileName}.png', format='png', dpi=450, bbox_inches = 'tight')
plt.show()