# Euan's Plotting and Formatting Functions

This notebook has a collection of functions for plotting pandas data and for formatting statistical test result output in LaTeX markup.

Euan Freeman (euan.freeman@glasgow.ac.uk)

In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

sns.set_context("notebook")
sns.set_style("whitegrid")

plt.rcParams['figure.dpi'] = 300
plt.rcParams['axes.linewidth'] = 0.5
plt.rcParams['grid.linewidth'] = 0.5
plt.rcParams['patch.linewidth'] = 0.0

In [2]:
def barplot_one_factor(df, iv, dv, ci=95, xlab="", ylab="", xlim=None, hide_legend=True, hide_grid=True, size=(6, 1.3), palette="magma"):
    """
    Creates a horizontal barplot for one categorical variable.
    
    Args:
        df:          Data frame in long format.
        iv:          Independent variable for the y-axis category.
        dv:          Dependent variable to plot on the x-axis.
        ci:          Confidence interval size.
        xlab:        The x-axis label.
        ylab:        The y-axis label.
        hide_legend: If true, hides the legend.
        hide_grid:   If true, hides the background grid.
        size:        Plot size as a tuple.
        palette:     Colour palette.
    
    Returns:
        (plot, axis) tuple.
    """
    plot, axis = plt.subplots(figsize=size)
    sns.barplot(data=df, x=dv, y=iv, palette=palette, dodge=True, ci=ci, errwidth=1, capsize=.4)
    
    if hide_legend:
        plt.legend([],[], frameon=False)

    axis.set_ylabel(ylab)
    axis.set_xlabel(xlab)
    axis.grid(not hide_grid)
    
    if xlim is not None:
        plt.xlim(xlim)
    
    return (plot, axis)

In [8]:
def barplot_two_factor(df, iv1, iv2, dv, ci=95, xlab="", ylab="", hide_legend=True, hide_grid=True, size=(6, 3), palette="magma"):
    """
    Creates a horizontal barplot for two categorical variables.
    
    Args:
        df:          Data frame in long format.
        iv1:         Independent variable for y-axis category.
        iv2:         Independent variable for bar colour.
        dv:          Dependent variable to plot on the x-axis.
        ci:          Confidence interval size.
        xlab:        The x-axis label.
        ylab:        The y-axis label.
        hide_legend: If true, hides the legend.
        hide_grid:   If true, hides the background grid.
        size:        Plot size as a tuple.
        palette:     Colour palette.
    
    Returns:
        (plot, axis) tuple.
    """
    plot, axis = plt.subplots(figsize=size)
    
    sns.barplot(data=df, x=dv, y=iv1, hue=iv2, palette=palette, ci=ci, errwidth=1, capsize=.1)
    
    if hide_legend:
        plt.legend([],[], frameon=False)
    else:
        axis.legend(handletextpad=0.5, columnspacing=1, loc="lower center", ncol=2, frameon=False, bbox_to_anchor=(0.493, -0.5))
    
    axis.set_ylabel(ylab)
    axis.set_xlabel(xlab)
    axis.grid(not hide_grid)
    
    return (plot, axis)

In [6]:
def stripplot(df, iv, dv, ci=95, xlab="", ylab="", hide_legend=True, hide_grid=True, size=(8, 2), palette="magma"):
    """
    Creates a horizontal strip plot with confidence intervals.
    """
    plot, axis = plt.subplots(figsize=size)
    sns.stripplot(x=dv, y=iv, data=df, palette=palette, dodge=True, size=6, alpha=0.2, jitter=True)
    sns.pointplot(x=dv, y=iv, data=df, palette=palette, dodge=.4, scale=1.5, ci=ci, errwidth=1.2)

    if hide_legend:
        plt.legend([],[], frameon=False)

    axis.set_ylabel(ylab)
    axis.set_xlabel(xlab)
    axis.grid(not hide_grid)
    
    return (plot, axis)

In [12]:
def friedman_to_LaTeX(result):
    """
    Formats the given Friedman's test result as a LaTeX string.
    """
    symbol = "<" if result[1] < 0.001 else "="
    p_val = ".001" if result[1] < 0.001 else "{:.3f}".format(result[1])
    
    return "$\chi^2 = {:.2f}, p {} {}$".format(result[0], symbol, p_val)

In [None]:
def nemenyi_to_LaTeX(result):
    """
    Formats the given Nemenyi test results as a LateX string.
    """
    result_str = ""
    
    i = 0
    j = 0
    
    for row in result.iteritems():
        condition = row[0]
        
        for p_val in row[1]:
            if j >= i and p_val < 0.05:
                result_str += "{} vs {} p = {:.3f}\n".format(condition, result.columns[j], p_val)
            
            j += 1
        
        i += 1
        j = 0
    
    return result_str