We'll look at using Seaborn to help visualize and understand finishing results from a marathon.

In [104]:
import numpy as np
import pandas as pd

In [105]:
data = pd.read_csv('marathon_times.csv')
data.head()

Unnamed: 0,age,gender,split,final
0,33,M,1:05:38 AM,2:08:51 AM
1,32,M,1:06:26 AM,2:09:28 AM
2,31,M,1:06:49 AM,2:10:42 AM
3,38,M,1:06:16 AM,2:13:45 AM
4,31,M,1:06:32 AM,2:13:59 AM


In [106]:
data['split'] = data['split'].map(lambda x: x.rstrip('AM'))

In [107]:
data['final'] = data['final'].map(lambda x: x.rstrip('AM'))

In [108]:
data.head()

Unnamed: 0,age,gender,split,final
0,33,M,1:05:38,2:08:51
1,32,M,1:06:26,2:09:28
2,31,M,1:06:49,2:10:42
3,38,M,1:06:16,2:13:45
4,31,M,1:06:32,2:13:59


In [109]:
data.dtypes

age        int64
gender    object
split     object
final     object
dtype: object

In [111]:
data['split_sec'] = data['split'].str.split(':').astype(int) / 1E9
data['final_sec'] = data['final'].astype(int) / 1E9

ValueError: setting an array element with a sequence.

In [None]:
#a converter for the times:
import datetime

def convert_time(s):
    h, m, s = map(int, s.split(':'))
    return datetime.timedelta(hours=h, minutes=m, seconds=s)

#data = pd.read_csv('marathon_times.csv', converters={'split':convert_time, 'final':convert_time})

In [None]:
data.dtypes

In [None]:
data['split'] = data['split'].apply(convert_time)

In [None]:
data['final'] = data['final'].apply(convert_time)

In [112]:
data.head()

Unnamed: 0,age,gender,split,final
0,33,M,1:05:38,2:08:51
1,32,M,1:06:26,2:09:28
2,31,M,1:06:49,2:10:42
3,38,M,1:06:16,2:13:45
4,31,M,1:06:32,2:13:59


In [None]:
with sns.axes_style('white'):
    g = sns.jointplot("split_sec", "final_sec", data, kind='hex')
    g.ax_joint.plot(np.linspace(4000, 16000),
                    np.linspace(8000, 32000), ':k')

In [None]:
data['split_frac'] = 1 - 2 * data['split_sec'] / data['final_sec']
data.head()

In [None]:
sns.distplot(data['split_frac'], kde=False);
plt.axvline(0, color="k", linestyle="--");

In [None]:
sum(data.split_frac < 0)

In [None]:
sns.kdeplot(data.split_frac[data.gender=='M'], label='men', shade=True)
sns.kdeplot(data.split_frac[data.gender=='W'], label='women', shade=True)
plt.xlabel('split_frac');

In [None]:
sns.violinplot("gender", "split_frac", data=data,
               palette=["lightblue", "lightpink"]);

In [None]:
data['age_dec'] = data.age.map(lambda age: 10 * (age // 10))
data.head()

In [None]:
men = (data.gender == 'M')
women = (data.gender == 'W')

with sns.axes_style(style=None):
    sns.violinplot("age_dec", "split_frac", hue="gender", data=data,
                   split=True, inner="quartile",
                   palette=["lightblue", "lightpink"]);

In [None]:
g = sns.lmplot('final_sec', 'split_frac', col='gender', data=data,
               markers=".", scatter_kws=dict(color='c'))
g.map(plt.axhline, y=0.1, color="k", ls=":");