In [60]:
import pandas as pd

import bokeh.io
import bokeh.plotting

bokeh.io.output_notebook()

In [61]:
def scatter(data, cat, x, y) :
    
    grouped = data.groupby(cat)
    
    palette=['#e41a1c',
             '#377eb8',
             '#4daf4a',
             '#984ea3',
             '#ff7f00',
             '#ffff33']


    p =bokeh.plotting.figure(
        height=300,
        width=400,
        x_axis_label=x,
        y_axis_label=y,
    )
    
    for i,(name, group) in enumerate(grouped) :
        print(name)
        p.circle(
            source=group,
            x=x,
            y=y,
            legend=str(name),
            color = palette[i % len(palette)]
        )

    p.legend.location = 'top_left'
    p.legend.click_policy = 'hide'
    bokeh.io.show(p)
    

In [69]:
def master_scatter(
    data=None,
    cat=None,
    x=None,
    y=None,
    p=None,
    palette=[
        "#4e79a7",
        "#f28e2b",
        "#e15759",
        "#76b7b2",
        "#59a14f",
        "#edc948",
        "#b07aa1",
        "#ff9da7",
        "#9c755f",
        "#bab0ac",
    ],
    show_legend=True,
    click_policy="hide",
    marker_kwargs={},
    **kwargs,
):
    """
    Parameters
    ----------
    df : Pandas DataFrame
        DataFrame containing tidy data for plotting.
    cat : hashable
        Name of column to use as categorical variable.
    x : hashable
        Name of column to use as x-axis.
    y : hashable
        Name of column to use as y-axis.
    p : bokeh.plotting.Figure instance, or None (default)
        If None, create a new figure. Otherwise, populate the existing
        figure `p`.
    palette : list of strings of hex colors, or single hex string
        If a list, color palette to use. If a single string representing
        a hex color, all glyphs are colored with that color. Default is
        the default color cycle employed by Vega-Lite.
    show_legend : bool, default False
        If True, show legend.
    tooltips : list of 2-tuples
        Specification for tooltips as per Bokeh specifications. For
        example, if we want `col1` and `col2` tooltips, we can use
        `tooltips=[('label 1': '@col1'), ('label 2': '@col2')]`. Ignored
        if `formal` is True.
    show_legend : bool, default False
        If True, show a legend.
    click_policy : str, default "hide"
        How to display points when their legend entry is clicked.
    marker_kwargs : dict
        kwargs to be passed to `p.circle()` when making the scatter plot.
    kwargs
        Any kwargs to be passed to `bokeh.plotting.figure()` when making 
        the plot.

    Returns
    -------
    output : bokeh.plotting.Figure instance
        Plot populated with jitter plot or box plot.
    """
    # Automatically name the axes
    if "x_axis_label" not in kwargs:
        kwargs["x_axis_label"] = x
    if "y_axis_label" not in kwargs:
        kwargs["y_axis_label"] = y

    # Instantiate figure
    if p is None:
        p = bokeh.plotting.figure(**kwargs)

    # Build plot (not using color factors) to enable click policies
    for i, (name, g) in enumerate(data.groupby(cat, sort=False)):
        marker_kwargs["color"] = palette[i % len(palette)]
        marker_kwargs["legend"] = str(name)
        p.circle(source=g, x=x, y=y, **marker_kwargs)

    if show_legend:
        p.legend.click_policy = click_policy
    else:
        p.legend.visible = False

    return p

In [62]:
df_frog = pd.DataFrame(data={'ID': ['I', 'II', 'III', 'IV'],
                             'age': ['adult', 'adult', 'juvenile', 'adult'],
                             'SVL (mm)': [63, 70, 28, 31],
                             'weight (g)': [63.1, 72.7, 12.7, 12.7],
                             'species': ['cross', 'cross', 'cranwelli', 'cranwelli']})
df_frog

Unnamed: 0,ID,age,SVL (mm),weight (g),species
0,I,adult,63,63.1,cross
1,II,adult,70,72.7,cross
2,III,juvenile,28,12.7,cranwelli
3,IV,adult,31,12.7,cranwelli


In [63]:
cats = ['age','species']
#grouped = df_frog.groupby(['age','species'])
#grouped.median().reset_index()

scatter(df_frog, cats,'SVL (mm)','weight (g)')


('adult', 'cranwelli')
('adult', 'cross')
('juvenile', 'cranwelli')


In [64]:
df_fruits = pd.DataFrame(data={
    'type': ['banana', 'banana','banana','banana','apple', 'apple', 'orange'],
    'weight (g)': [63.1, 72.7, 12.7, 12.7, 63.1, 72.7, 12.7],
    'price': [3, 4, 2, 3, 10, 9, 50]})

df_fruits.head()

Unnamed: 0,type,weight (g),price
0,banana,63.1,3
1,banana,72.7,4
2,banana,12.7,2
3,banana,12.7,3
4,apple,63.1,10


In [65]:
cats = ['type']

scatter(df_fruits, cats,'weight (g)','price')



apple
banana
orange


In [71]:
df = pd.read_csv('../data/frog_tongue_adhesion.csv', comment='#')

df.head()

Unnamed: 0,date,ID,trial number,impact force (mN),impact time (ms),impact force / body weight,adhesive force (mN),time frog pulls on target (ms),adhesive force / body weight,adhesive impulse (N-s),total contact area (mm2),contact area without mucus (mm2),contact area with mucus / contact area without mucus,contact pressure (Pa),adhesive strength (Pa)
0,2013_02_26,I,3,1205,46,1.95,-785,884,1.27,-0.29,387,70,0.82,3117,-2030
1,2013_02_26,I,4,2527,44,4.08,-983,248,1.59,-0.181,101,94,0.07,24923,-9695
2,2013_03_01,I,1,1745,34,2.82,-850,211,1.37,-0.157,83,79,0.05,21020,-10239
3,2013_03_01,I,2,1556,41,2.51,-455,1025,0.74,-0.17,330,158,0.52,4718,-1381
4,2013_03_01,I,3,493,36,0.8,-974,499,1.57,-0.423,245,216,0.12,2012,-3975


In [72]:
df_frog

Unnamed: 0,ID,age,SVL (mm),weight (g),species
0,I,adult,63,63.1,cross
1,II,adult,70,72.7,cross
2,III,juvenile,28,12.7,cranwelli
3,IV,adult,31,12.7,cranwelli


In [73]:
#Ex3.2
#Load data
df = pd.read_csv('../data/frog_tongue_adhesion.csv', comment='#')

# merge with existing dataFrame
df = df.merge(df_frog)

df.head()

#plot scatter graph
scatter(df, 'age','impact force (mN)','adhesive force (mN)')

adult
juvenile


In [74]:
df.head()

Unnamed: 0,date,ID,trial number,impact force (mN),impact time (ms),impact force / body weight,adhesive force (mN),time frog pulls on target (ms),adhesive force / body weight,adhesive impulse (N-s),total contact area (mm2),contact area without mucus (mm2),contact area with mucus / contact area without mucus,contact pressure (Pa),adhesive strength (Pa),age,SVL (mm),weight (g),species
0,2013_02_26,I,3,1205,46,1.95,-785,884,1.27,-0.29,387,70,0.82,3117,-2030,adult,63,63.1,cross
1,2013_02_26,I,4,2527,44,4.08,-983,248,1.59,-0.181,101,94,0.07,24923,-9695,adult,63,63.1,cross
2,2013_03_01,I,1,1745,34,2.82,-850,211,1.37,-0.157,83,79,0.05,21020,-10239,adult,63,63.1,cross
3,2013_03_01,I,2,1556,41,2.51,-455,1025,0.74,-0.17,330,158,0.52,4718,-1381,adult,63,63.1,cross
4,2013_03_01,I,3,493,36,0.8,-974,499,1.57,-0.423,245,216,0.12,2012,-3975,adult,63,63.1,cross


In [67]:
#Ex3.3

df_1973 = pd.read_csv('../data/grant_1973.csv', comment='#')
#df_1973.rename(columns={'yearband':'year'}, inplace=True)
df_1973.columns = ['band','species', 'year', 'beak length (mm)', 'beak depth (mm)']

df_1975 = pd.read_csv('../data/grant_1975.csv', comment='#')
df_1975.columns = ['band','species', 'beak length (mm)', 'beak depth (mm)']
df_1975.insert(2,'year',75, True)

df_1987 = pd.read_csv('../data/grant_1987.csv', comment='#')
df_1987.columns = ['band','species', 'beak length (mm)', 'beak depth (mm)']
df_1987.insert(2,'year',87, True)

df_1991 = pd.read_csv('../data/grant_1991.csv', comment='#')
df_1991.columns = ['band','species', 'beak length (mm)', 'beak depth (mm)']
df_1991.insert(2,'year',91, True)

df_2012 = pd.read_csv('../data/grant_2012.csv', comment='#')
df_2012.columns = ['band','species', 'beak length (mm)', 'beak depth (mm)']
df_2012.insert(2,'year',2012, True)
df_1973.head()

Unnamed: 0,band,species,year,beak length (mm),beak depth (mm)
0,20123,fortis,73,9.25,8.05
1,20126,fortis,73,11.35,10.45
2,20128,fortis,73,10.15,9.55
3,20129,fortis,73,9.95,8.75
4,20133,fortis,73,11.55,10.15


In [68]:
df = pd.concat([df_1973, df_1975])
df.head()

Unnamed: 0,band,species,year,beak length (mm),beak depth (mm)
0,20123,fortis,73,9.25,8.05
1,20126,fortis,73,11.35,10.45
2,20128,fortis,73,10.15,9.55
3,20129,fortis,73,9.95,8.75
4,20133,fortis,73,11.55,10.15
