# General Notes
null means no response (the question was optional)

# Import required libraries

In [1]:
import altair as alt
import pandas as pd

# Load Data

In [2]:
sheffield_df = pd.read_csv('./data/clean/sheffield_clean.csv')
funders_df = pd.read_csv('./data/clean/funders_clean.csv')
conf_order = ['Very confident','Confident','Not confident','Not heard of it']

# In which faculty are you based?

In [3]:
alt.Chart(sheffield_df).mark_bar().encode(
    x=alt.X('count(faculty)', axis=alt.Axis(title='Number of Responses')),
    y=alt.Y('faculty', sort='-x', axis=alt.Axis(title='Faculty'))
)

# Which of the following organisations usually fund your research?
Each individual respondant could specify multiple funding sources. 
ToDo: Further data cleaning and grouping of responses.

In [4]:
alt.Chart(funders_df).mark_bar().encode(
    x=alt.X('count(funder)', axis=alt.Axis(title='Number of Responses')),
    y=alt.Y('funder', sort='-x', axis=alt.Axis(title='Funder'))
)

# What is your job title?
ToDo: Grouping of responses.

In [5]:
alt.Chart(sheffield_df).mark_bar().encode(
    x=alt.X('count(clean_job)', axis=alt.Axis(title='Number of Responses')),
    y=alt.Y('clean_job', sort='-x', axis=alt.Axis(title='Job Title'))
)

# Do you use research software?
"Research software" is any software you have used in the generation of a result that you expect to appear in a publication. This might be anything from a few-line script to clean some data, to a fully fledged software suite. It includes code you have written yourself and code written by someone else.

In [6]:
alt.Chart(sheffield_df).mark_bar().encode(
    x=alt.X('count(use_software)', axis=alt.Axis(title='Number of Responses')),
    y=alt.Y('use_software', axis=alt.Axis(title='Use Research Software'))
)

# How important is research software to your work?

1: Not at all

5: Vital

In [7]:
alt.Chart(sheffield_df).mark_bar().encode(
    x=alt.X('count(importance_software)', axis=alt.Axis(title='Number of Responses')),
    y=alt.Y('importance_software', axis=alt.Axis(title='Software Importance'))
)

# Have you developed your own research software?

In [8]:
alt.Chart(sheffield_df).mark_bar().encode(
    x=alt.X('count(develop_own_code)', axis=alt.Axis(title='Number of Responses')),
    y=alt.Y('develop_own_code', axis=alt.Axis(title='Develop Own Code'))
)

# How do you rate your software development expertise?
ToDo: Potentially misleading y scale.

In [9]:
alt.Chart(sheffield_df).mark_bar().encode(
    x=alt.X('count(development_expertise)', axis=alt.Axis(title='Number of Responses')),
    y=alt.Y('development_expertise', axis=alt.Axis(title='Development Expertise'))
)

# Do you feel that you have received sufficient training to develop reliable software?

In [10]:
alt.Chart(sheffield_df).mark_bar().encode(
    x=alt.X('count(training)', axis=alt.Axis(title='Number of Responses')),
    y=alt.Y('training', axis=alt.Axis(title='Sufficient Training'))
)

# Would you be interested in the university helping you commercialise some of your research software?

In [11]:
alt.Chart(sheffield_df).mark_bar().encode(
    x=alt.X('count(want_to_commercialise)', axis=alt.Axis(title='Number of Responses')),
    y=alt.Y('want_to_commercialise', axis=alt.Axis(title='Want Help'))
)

# Do you feel that your research software is ready to be shared with a commercial partner?

In [12]:
alt.Chart(sheffield_df).mark_bar().encode(
    x=alt.X('count(ready_to_share)', axis=alt.Axis(title='Number of Responses')),
    y=alt.Y('ready_to_share', axis=alt.Axis(title='Ready to Share'))
)

# Have you used ShARC/Bessemer, the University's high-performance computing (HPC) system?
ToDo: Group these responses.

In [13]:
alt.Chart(sheffield_df).mark_bar().encode(
    x=alt.X('count(hpc_use):Q', axis=alt.Axis(title='Number of Responses')),
    y=alt.Y('hpc_use:N', axis=alt.Axis(title='HPC Use'))
)

# How confident are you with the following technologies? [Version control]

In [14]:
alt.Chart(sheffield_df).mark_bar().encode(
    x=alt.X('count(version_control)', axis=alt.Axis(title='Number of Responses')),
    y=alt.Y('version_control', sort=conf_order, axis=alt.Axis(title='Confidence'))
)

# How confident are you with the following technologies? [Continuous integration]

In [15]:
alt.Chart(sheffield_df).mark_bar().encode(
    x=alt.X('count(continuous_integration)', axis=alt.Axis(title='Number of Responses')),
    y=alt.Y('continuous_integration', sort=conf_order, axis=alt.Axis(title='Confidence'))
)

# How confident are you with the following technologies? [Unit testing]

In [16]:
alt.Chart(sheffield_df).mark_bar().encode(
    x=alt.X('count(unit_testing)', axis=alt.Axis(title='Number of Responses')),
    y=alt.Y('unit_testing', sort=conf_order, axis=alt.Axis(title='Confidence'))
)

# How would you rate the university's current level of support for your software-development needs?
ToDo: Potentially misleading y scale.

In [17]:
alt.Chart(sheffield_df).mark_bar().encode(
    x=alt.X('count(current_support)', axis=alt.Axis(title='Number of Responses')),
    y=alt.Y('current_support', sort='-x', axis=alt.Axis(title='HPC Use'))
)

# Have you or someone in your group ever hired someone specifically to develop software?

In [18]:
alt.Chart(sheffield_df).mark_bar().encode(
    x=alt.X('count(hired_developer)', axis=alt.Axis(title='Number of Responses')),
    y=alt.Y('hired_developer', axis=alt.Axis(title='Hired'))
)

# Have you ever included costs for software development in a funding proposal?
ToDo: Group responses.

In [19]:
alt.Chart(sheffield_df).mark_bar().encode(
    x=alt.X('count(funds_for_development)', axis=alt.Axis(title='Number of Responses')),
    y=alt.Y('funds_for_development', axis=alt.Axis(title='Funds'))
)

# How suitable would the following models be for your software development needs? [Hire a full-time software developer]

In [20]:
alt.Chart(sheffield_df).mark_bar().encode(
    x=alt.X('count(hire_full_time_developer)', axis=alt.Axis(title='Number of Responses')),
    y=alt.Y('hire_full_time_developer', axis=alt.Axis(title='Hire'))
)

# How suitable would the following models be for your software development needs? [Recruit a developer (or fractional FTE equivalent of a developer) from a central University of Sheffield pool as needed]

In [21]:
alt.Chart(sheffield_df).mark_bar().encode(
    x=alt.X('count(hire_rse)', axis=alt.Axis(title='Number of Responses')),
    y=alt.Y('hire_rse', axis=alt.Axis(title='RSE'))
)