In [1]:
#importing the necessary modules
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import numpy as np
import plotly.io as pio
import kaleido
from utils import *

%load_ext autoreload
%autoreload 2

In [2]:
#reading data from main notebook 
%store -r data

### Work type and skills of the participants

##### Creation of the dataframes

In [3]:
#Dataframe for work description - distribution 
work_cat = data['Work description'].value_counts().to_frame().reset_index()
work_cat['Percentage'] = (work_cat['Work description']/work_cat['Work description'].sum())*100
work_cat['Percentage'] = work_cat['Percentage'].round(decimals=1)
work_cat.loc[work_cat['index'] <3, 'Category'] ='Imaging'
work_cat.loc[(work_cat['index'] >= 3) & (work_cat['index'] <= 5), 'Category'] ='Balanced'
work_cat.loc[work_cat['index'] >5 , 'Category'] ='Analyst'
work_cat = work_cat.sort_values(by='index')
work_cat.to_csv('C:\\Users\\ssivagur\\Documents\\GitHub\\2023_ImageAnalysisSurvey\\csv files\\work_des.csv')

In [4]:
#dataframe for categorization of computational skills 
comp_skill_distbn = data["Level of computational skills"].value_counts().to_frame().reset_index()
comp_skill_distbn['Percentage'] = (comp_skill_distbn['Level of computational skills']/comp_skill_distbn['Level of computational skills'].sum())*100
comp_skill_distbn['Percentage'] = comp_skill_distbn['Percentage'].round(decimals=1)
comp_skill_distbn.loc[comp_skill_distbn['index'] <3, 'Category'] = 'Low skill'
comp_skill_distbn.loc[(comp_skill_distbn['index'] >= 3) & (comp_skill_distbn['index'] <= 5), 'Category'] = 'Medium skill'
comp_skill_distbn.loc[comp_skill_distbn['index'] >5, 'Category'] = 'High skill'
comp_skill_distbn = comp_skill_distbn.sort_values(by='index')
comp_skill_distbn.to_csv('C:\\Users\\ssivagur\\Documents\\GitHub\\2023_ImageAnalysisSurvey\\csv files\\comp_skill_distbn.csv')

In [5]:
#dataframe for categorization of comfort in developing new computational skills
comf_distbn = data['Comfort in developing computational skills'].value_counts().to_frame().reset_index()
comf_distbn['Percentage'] = (comf_distbn['Comfort in developing computational skills']/comf_distbn['Comfort in developing computational skills'].sum())*100
comf_distbn['Percentage'] = comf_distbn['Percentage'].round(decimals=1)
comf_distbn.loc[comf_distbn['index'] <3, 'Category'] ='Low comfort'
comf_distbn.loc[(comf_distbn['index'] >= 3) & (comf_distbn['index'] <= 5), 'Category'] ='Medium comfort'
comf_distbn.loc[comf_distbn['index'] >5 , 'Category'] ='High  comfort'
comf_distbn = comf_distbn.sort_values(by='index')
comf_distbn.to_csv('C:\\Users\\ssivagur\\Documents\\GitHub\\2023_ImageAnalysisSurvey\\csv files\\comf_distbn.csv')

##### Creation of the figures 

In [10]:
work_cat_bar = barchart_vertical_fig(work_cat, title = 'Work description',color_by='Category', category_color={'Imaging':'lightskyblue', 'Balanced':'darkseagreen', 'Analyst':'orchid'})
comp_skill_distbn_bar = barchart_vertical_fig(comp_skill_distbn, title='Level of computational skills', color_by='Category', category_color={'Low skill':'lightskyblue', 'Medium skill':'darkseagreen', 'High skill':'orchid'})
comf_distbn_bar = barchart_vertical_fig(comf_distbn, title='Comfort in developing new computational skills', color_by='Category', category_color={'Low comfort':'lightskyblue', 'Medium comfort':'darkseagreen', 'High  comfort':'orchid'})

#### Creation of the sunburst charts

##### Creation of the dataframes 

In [6]:
# Creating new columns for categorization 
#Creating a column for tarinee status
data['Trainee status'] = data['Role'].dropna().apply(lambda x: 'Trainee' if x in ['Undergraduate/Graduate student', 'Postdoctoral fellow'] else 'Nontrainee')

# creating a column for work type
data.loc[data["Work description"] < 3, "Work type" ] = "Imaging"
data.loc[(data["Work description"] >= 3) & (data["Work description"] <= 5), "Work type"] = "Balanced"
data.loc[data["Work description"] > 5, "Work type"] = "Analyst"

# creating a column for knowledge in computational skills 
data.loc[data["Level of computational skills"] < 3, "Knowledge of computational skills"] = "Low skill"
data.loc[(data["Level of computational skills"] >= 3) & (data["Level of computational skills"] <= 5), "Knowledge of computational skills"] = "Medium skill"
data.loc[data["Level of computational skills"] > 5, "Knowledge of computational skills"] = "High skill"

# Comfort in developing new computational skills
data.loc[data["Comfort in developing computational skills"] < 3, 'Comfort'] = 'Low comfort'
data.loc[(data['Comfort in developing computational skills'] >= 3) & (data['Comfort in developing computational skills'] <= 5), 'Comfort'] = "Medium comfort"
data.loc[data['Comfort in developing computational skills'] > 5 , 'Comfort'] = "High comfort"

In [7]:
#slicing the dataframes for the specific categories
#Trainee and non-trainee categorization
trainee_df = data.loc[:, ("Work type", 'Trainee status')]
trainee_df.to_csv('C:\\Users\\ssivagur\\Documents\\GitHub\\2023_ImageAnalysisSurvey\\csv files\\trainee_df.csv')

trainee_comp_comf_df = data.loc[:, ('Knowledge of computational skills','Comfort', 'Trainee status')]
trainee_comp_comf_df.to_csv('C:\\Users\\ssivagur\\Documents\\GitHub\\2023_ImageAnalysisSurvey\\csv files\\trainee_comp_comf_df.csv')

#combining the work type, comp skills and comfort 
work_comp_com_df = data.loc[:, ['Work type', 'Knowledge of computational skills', 'Comfort', 'Microscopy for life sciences physical sciences']]
work_comp_com_df.to_csv('C:\\Users\\ssivagur\\Documents\\GitHub\\2023_ImageAnalysisSurvey\\csv files\\work_comp_com_df.csv')

work_comp_com_lif_df = work_comp_com_df.groupby('Microscopy for life sciences physical sciences').get_group('Life Sciences')
work_comp_com_lif_df.to_csv('C:\\Users\\ssivagur\\Documents\\GitHub\\2023_ImageAnalysisSurvey\\csv files\\work_comp_com_lif_df.csv')

work_comp_com_phy_df = work_comp_com_df.groupby('Microscopy for life sciences physical sciences').get_group('Physical Sciences')
work_comp_com_phy_df.to_csv('C:\\Users\\ssivagur\\Documents\\GitHub\\2023_ImageAnalysisSurvey\\csv files\\work_comp_com_phy_df.csv')

##### Creation of the charts

In [13]:
# Charts for the tarinees vs non-trainees
trainee_worktype_fig  = sunburst_chart(trainee_df, order_list=['Trainee status', "Work type"], color_column='Trainee status', custom_colors={'Trainee':'lightskyblue', 'Nontrainee':'darkseagreen'}, title='Worktype categorized based on trainee status')
trainee_comp_comf_fig = sunburst_chart(trainee_comp_comf_df, order_list=['Trainee status', 'Knowledge of computational skills', 'Comfort'], color_column='Trainee status', custom_colors={'Trainee':'lightskyblue', 'Nontrainee':'darkseagreen'}, title='Computational skills of the trainees and nontrainees') 

#Charts for the life sciences vs physical sciences 
sun_lif = sunburst_chart(work_comp_com_lif_df, order_list=["Work type", 'Knowledge of computational skills', 'Comfort'], color_column='Work type', custom_colors={'Imaging':'lightskyblue', 'Balanced':'darkseagreen', 'Analyst':'orchid'}, title='Skills of the participants (Life Sciences)')
sun_phy = sunburst_chart(work_comp_com_phy_df, order_list=["Work type", 'Knowledge of computational skills', 'Comfort'], color_column='Work type', custom_colors={'Imaging':'lightskyblue', 'Balanced':'darkseagreen', 'Analyst':'orchid'}, title='Skills of the participants (Physical Sciences)')

#Overall chart
lif_phy = sunburst_chart(work_comp_com_df,order_list=["Work type", 'Knowledge of computational skills', 'Comfort'], color_column = 'Work type', custom_colors= {'Imaging':'lightskyblue', 'Balanced':'darkseagreen', 'Analyst':'orchid'}, title='Skills of the participants' )