### Graphs to be plotted for the physical sciences 
#### Questions that were specific for physical science participants

Physical sciences image analysis 
* Image analysis tools that have been used before (bar chart)
* Most used image analysis tools (bar chart)
* Commonly analyzed images (bar chart)
* Well-solved image analysis problems (wordcloud)
* Image analysis problems that had better solutions (wordcloud)


### Data that need to be broken down for physical science participants 

* work type, computational skills and comfort in developing new computational skills (sunburst chart)
* Approaches to image analysis problems (bar chart)
* Preferable instructional methods (bar chart)
* Topics of interest to learn (bar chart)
* Conferences that would benefit from image analysis offerings (bar chart)
* Creators role -? (wordcloud)
* Users role -? (wordcloud)


#### Functions and templates for the graphs 

In [1]:
# Necessary modules 
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
import numpy as np
import requests
import io

#### Reading the data from Github

In [10]:
# downloading the file from Github account 
url = 'https://raw.githubusercontent.com/COBA-NIH/2023_ImageAnalysisSurvey/main/Bridging%20Imaging%20Users%20to%20Imaging%20Analysis%20-%202022%20(Responses)%20-%20Form%20Responses%201.csv'
download = requests.get(url).content

In [11]:
#Read the survey data csv 
data = pd.read_csv(io.StringIO(download.decode('utf-8')))

##### Bar charts

i) Image analysis tools that have been used before

In [22]:
#What image analysis tools have you used before?
df_1 = data.iloc[:, 40].dropna(how='all')
df_1 = pd.DataFrame(df_1)
#Commonly used image analysis tools
df_1["Commercial software with the microscope"] = df_1['What image analysis tools have you used before? (check all that apply).1'].apply(lambda x: True if 'Aztec' in x else False)
df_1["Other commercial software"] = df_1['What image analysis tools have you used before? (check all that apply).1'].apply(lambda x: True if 'Imaris' in x else False)
df_1["Open source point-and-click software"] = df_1["What image analysis tools have you used before? (check all that apply).1"].apply(lambda x: True if 'FIJI' in x else False)
df_1["Computational libraries and scripts"] = df_1["What image analysis tools have you used before? (check all that apply).1"].apply(lambda x: True if 'MATLAB' in x else False)
df_1["Tools_none"] = df_1["What image analysis tools have you used before? (check all that apply).1"].apply(lambda x: True if 'None' in x else False)

# creating a subset of the dataframe 
com_tool_df =df_1.iloc[:, 1:6]
com_tool_df = com_tool_df.sum()
com_tool_df = com_tool_df.reset_index()
com_tool_df = com_tool_df.rename(columns={'index':'tools', 0:'counts'})
com_tool_df

#bar chart for the commonly used image analysis tools
com_tools_bar = px.bar(com_tool_df, y=com_tool_df['tools'], x=com_tool_df['counts'], labels={'tools':''}, text_auto=True, orientation='h')
                                              
com_tools_bar.update_layout(width=700, height=400, title="Commonly used image analysis tools", title_x=0.5, title_y=0.95, font=dict(family='Helvetica', color="Black", size=16), legend=dict(title_font_family = 'Helvetica', font=dict(size=16, color="Black")))
com_tools_bar.update_yaxes(categoryorder = 'total ascending')
                                            

ii) Most used image analysis tools 

In [25]:
#creating a subset of the dataframe
# Input - Specifc column in a the dataframe
df = data.iloc[:, 41].dropna(how='all')
df = pd.DataFrame(df)

#using the following instead of the value counts since the legends couldn't be shortened 
df['Commercial software that comes with my microscope'] = df['What image analysis tools do you use the most?.1'].apply(lambda x:True if x == 'Commercial software that comes with my microscope (AutoMET, Gatan DigitalMicrograph. Aztec, etc)' else False)
df['Other commercial software'] = df['What image analysis tools do you use the most?.1'].apply(lambda x: True if x == 'Other commercial software (Avizo, Imaris, Volocity, etc)' else False)
df['Open source point-and-click software'] = df['What image analysis tools do you use the most?.1'].apply(lambda x: True if x == 'Open source point-and-click software (ImageJ, FIJI, Gwyddion, etc)'else False)
df['Computational libraries and scripts'] = df['What image analysis tools do you use the most?.1'].apply(lambda x:True if x == 'Computational libraries and scripts (python (scikit-image), MATLAB, etc)' else False)
df['None'] = df['What image analysis tools do you use the most?.1'].apply(lambda x:True if x == 'None' else False)
df_subset = df.iloc[:, 1:6]
counts_tools_df = pd.DataFrame(df_subset.sum())
counts_tools_df = counts_tools_df.reset_index()
counts_tools_df = counts_tools_df.rename(columns={'index':'tools', 0:'counts'})
counts_tools_df

# Bar chart for the mostly used image analysis tools
counts_tools_bar = px.bar(counts_tools_df, y=counts_tools_df['tools'], x=counts_tools_df['counts'],labels ={'tools':''}, orientation ='h', width=650, height=400, text_auto=True)
counts_tools_bar.update_layout(font=dict(family='Helvetica', color="Black", size=16), legend=dict(title_font_family = 'Helvetica', font=dict(size=16, color="Black")))
counts_tools_bar.update_yaxes(categoryorder='total ascending')
counts_tools_bar.update_layout(title='Most used image analysis tools', title_x=0.5,title_y = 0.95)

iii) Commonly analyzed images

In [65]:
#creating the dataframe for the kinds of images 
dict={}
for i in range(42, 51):
    df = data.iloc[:, i].dropna(how='all')
    df = pd.DataFrame(df)
    col_name = df.columns[0]
    col_name_split = col_name.split()
    dfname = col_name_split[-1]
    df['2D'] = df[col_name].apply(lambda x: True if '2D' in x else False)
    df["2D + time"] = df[col_name].apply(lambda x: True if '2D + time' in x else False)
    df["3D"] = df[col_name].apply(lambda x: True if '<3000' in x else False)
    df["3D + time"] = df[col_name].apply(lambda x: True if '3D + time' in x else False)
    df["3D (SPIM/large volume)"] = df[col_name].apply(lambda x: True if '(SPIM/large volume)' in x else False)
    df["3D large volume + time"] = df[col_name].apply(lambda x: True if '3D large volume + time' in x else False)

    df_vc = df.drop(columns=col_name)
    df_vc = df_vc.sum().to_frame().rename(columns={0:dfname})
    df_vc = df_vc.reset_index()
    df_vc = df_vc.set_index('index').transpose()

    dict[dfname] = df_vc
    

In [66]:
dict.keys()

dict_keys(['microscopy/DIC/fractography]', 'imaging)]', 'HAADF-STEM)]', 'CL)]', 'SIMS/APT)]', 'tomography)]', 'spectroscopy]', 'microscopy]', '[Other]'])

In [75]:
combined_df = pd.concat([dict['microscopy/DIC/fractography]'], dict['imaging)]'], dict['HAADF-STEM)]'], dict['CL)]'], dict['SIMS/APT)]'], dict['tomography)]'], dict['spectroscopy]'], dict['microscopy]'], dict['[Other]']])
combined_df = combined_df.reset_index()
combined_df

index,index.1,2D,2D + time,3D,3D + time,3D (SPIM/large volume),3D large volume + time
0,microscopy/DIC/fractography],17,12,6,3,0,3
1,imaging)],12,5,4,0,0,0
2,HAADF-STEM)],13,6,5,1,0,1
3,CL)],11,6,3,1,0,0
4,SIMS/APT)],3,1,4,1,0,1
5,tomography)],3,2,5,3,0,4
6,spectroscopy],5,3,2,0,0,0
7,microscopy],16,12,9,6,0,6
8,[Other],9,6,3,1,0,1


In [83]:
phy_sci_bar = go.Figure()

phy_sci_bar.add_trace(go.Bar(name='2D',y=combined_df['index'], x=combined_df['2D'],text =combined_df['2D'], orientation='h'))
phy_sci_bar.add_trace(go.Bar(name='2D + time',y=combined_df['index'], x=combined_df['2D + time'],text =combined_df['2D + time'], orientation='h'))
phy_sci_bar.add_trace(go.Bar(name='3D',y=combined_df['index'], x=combined_df['3D'],text=combined_df['3D'], orientation='h'))
phy_sci_bar.add_trace(go.Bar(name='3D + time',y=combined_df['index'], x=combined_df['3D + time'],text =combined_df['3D + time'], orientation='h'))
phy_sci_bar.add_trace(go.Bar(name='3D (SPIM/large volume)',y=combined_df['index'],text =combined_df['index'], x=combined_df['3D (SPIM/large volume)'],orientation='h'))
phy_sci_bar.add_trace(go.Bar(name='3D large volume + time',y=combined_df['index'], x=combined_df['3D large volume + time'],text =combined_df['3D large volume + time'], orientation='h'))

phy_sci_bar.update_layout(barmode='stack', title ='What kinds of images do you commonly want to analyze?')
phy_sci_bar.update_yaxes(categoryorder='total ascending')

##### Wordcloud

In [5]:
# defining a function to make a wordcloud 
# input - Specific columns of a dataframe with the question on the survey data form as the column name
def wordcloud(df):
    from wordcloud import WordCloud, STOPWORDS
    import matplotlib.pyplot as plt
    input = ''.join(df.str.lower().str.split().dropna(how='all').astype(str).str.replace(r'[-./?!,":;()\']',' '))
    words_to_remove = df.name.split() 
    stopwords_new = words_to_remove + list(STOPWORDS)
    wc_image = WordCloud(stopwords=stopwords_new, background_color='white', width=600, height=600, random_state=4).generate(input)
    plt.imshow(wc_image)
    plt.tight_layout()
    plt.title(df.name)
    plt.axis('off')
    plt.savefig('Image-' +df.name+ '.png')