In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pylab
#import scipy as sp
#import scipy.stats as sps
import seaborn as sns

from IPython.display import display, Image
from sqlalchemy import create_engine
from textwrap import wrap

##Survey question

In [None]:
display(Image(filename='PR_variations.png'))

### Read in answers

In [None]:
# connect to loca MySQL database
engine = create_engine('mysql+oursql://kratzscience:viqT0R0EGC@localhost/npg_survey')

# relevant questions
pr_variation_columns = "Variationsontraditionalpeerreviewloop_1, Variationsontraditionalpeerreviewloop_2, \
                        Variationsontraditionalpeerreviewloop_3, Variationsontraditionalpeerreviewloop_4, \
                        Variationsontraditionalpeerreviewloop_5"

# construct query string; limit to US respondents
query_string = "SELECT " + pr_variation_columns + " FROM all_responses WHERE CountryNorthAmerica = 'United States'"

# set up dataframe
pr_variations = pd.read_sql(query_string, engine)

In [None]:
display(pr_variations)

#### Define graph style

In [None]:
pylab.rcParams['figure.figsize'] = (8.0, 6.0)

# paramaters for saved figures
pylab.rcParams['savefig.bbox'] = 'tight'
pylab.rcParams['savefig.dpi'] = 300

# set base Seaborn style
sns.set_style("white", 
              {'font.sans-serif': ['Helevetica', 'Liberation Sans', 
                                   'Bitstream Vera Sans', 'sans-serif'],
               'axes.linewidth': 0,
               'xtick.direction': 'in',
               'xtick.major.size': 6.0})


def apply_cdl_style(fig):
    """
    Applies some elemnts of CDL house style.
    fig: matplolib figure object
    """
    
    # kill left axis chartjunk
    fig.set_ylabel('')
    sns.despine(ax=fig, left=True)

    # get rid of Pandas dashed line
    fig.lines[0].set_visible(False) 
    
    #set font sizes
    fig.tick_params(axis='x', width=2, labelsize=12, color='#808080')
    fig.tick_params(axis='y', labelsize=12)
    
    return fig

In [None]:
def graph_likert(questions, answers, label_width=25, legend_width=10, divergent=False):
    """
    Prepare a horizontal stacked bar graph based on a Likert-scale question.
    
    questions (DataFrame): columns are items rated, rows are individual responses
    answers: (list): the response scale in ascending order (e.g., [poor, fine, good])
    label_width (int): max number of characters per line in the item labels
    legend_width (int): max number of characters per line in the legend labels
    divergent (bool): If True, use a divergent color palette, if False, use a sequential one) 

    """
    
    collected_counts = pd.DataFrame(index=answers)
    mean = pd.Series(index=questions.columns)

    # set up dict for converstion from likert scale (e.g., 1-5) to 0-100%
    number_of_answers = len(answers) 
    answer_to_value = dict(zip(answers, np.arange(number_of_answers)/float(number_of_answers - 1)*100)) 

    for column in questions.columns:
        collected_counts[column] = questions[column].value_counts().dropna()

        #scale responses to go from 0 to 100
        likert_values = questions[column].dropna().map(answer_to_value)
        mean[column] = likert_values.mean() 
        
    #sort collected_counts by the mean   
    mean.sort()
    collected_counts = collected_counts.T.reindex(index=mean.index)
    collected_counts = collected_counts.div(collected_counts.sum(1).astype(float)/100, axis = 0)
    
    
    collected_counts.index = [ '\n'.join(wrap(i, label_width)) for i in collected_counts.index ]
    collected_counts.columns = [ '\n'.join(wrap(i, legend_width)) for i in collected_counts.columns ]
    
    # set color palette
    palette = "RdBu_r" if divergent else "Blues"
    
    # plot percentages of each response
    fig = collected_counts.plot(kind='barh', stacked=True, grid=False, 
                                color=sns.color_palette(palette, len(collected_counts.columns))[::-1],
                                xlim = (0,100), edgecolor='w', linewidth=2) 
    
    # plot mean
    fig.plot(mean, np.arange(len(mean)), marker='o', color='w',axes=fig, 
             markersize=25, markeredgewidth=0, linewidth=0)
    
    
    fig.legend(bbox_to_anchor=(0., -0.02, 1., -0.03), loc='upper left', ncol=number_of_answers, 
               mode="expand", borderaxespad=0., fontsize=12)
    
    apply_cdl_style(fig) 
    fig.get_figure().set_size_inches(12., 2. * len(collected_counts.index))

    return fig

#### Set up

In [None]:
# give columns meaningful names
PR_VARIATIONS = ["Publish signed reviews",
                 "Crowd sourced peer review",
                 "Post-publication peer review",
                 "Double-blind peer review",
                 "Open peer review"]

pr_variations.columns = PR_VARIATIONS

# put answers in order
IDEA_LEVELS = ["This is a bad idea",
               "This is not a very good idea",
               "This is quite a good idea",
               "This is a very good idea"]

### Here are some examples of variations of peer review methods. Please use the scale provided to indicate your support for each approach.

In [None]:
fig = graph_likert(pr_variations, IDEA_LEVELS, divergent=True)
#display(fig)

In [None]:
import plotly.plotly as py
import plotly.tools as tls
fig.legend_.remove()

ply_fig = tls.mpl_to_plotly(plt.gcf())

#py.iplot_mpl(plt.gcf())