# Toronto triathlon festival - 2018 #
- Summarize results for Toronto triathlon festival, 2018-07-22
- Dataset from [Sportstats](https://www.sportstats.ca/display-results.xhtml?raceid=93927&status=results)
- Copied to tab-delimited format, file *results.txt* in [this repository](https://github.com/yiradati/Toronto-Triathlon-Festival-2018/)- Code by u/yiradati



** import libraries **

In [1]:
#import
import csv
import matplotlib.pyplot as plt
import numpy as np

** Read data **
- input data saved as *results.txt* in local folder

In [2]:
#read data
file_in='results.txt'
input_stream=open(file_in)
reader=csv.reader(input_stream, delimiter='\t')
data=[]
headers=reader.next()
for row in reader:
    data.append(row)
input_stream.close()    

** Helper functions **
- get_hours(time_string)
    - input: string
    - output: hours (int)
- get_minutes(time_string)
    - input: string
    - output: minutes (int)
- get_seconds(time_string)
    - input: string
    - output: seconds (float)
- get_time_as_min(time_string)
    - input: string
    - output: total number of minutes in entire string (float)
- get_gender(class_string)
    - input: string
    - output: gender as string

In [3]:
#return number of hours in time string
def get_hours(time_in):
    hours=int(time_in[0:2])
    return hours

In [4]:
#return number of minutes in time string
def get_minutes(time_in):
    minutes=int(time_in[3:5])
    return minutes

In [5]:
#return number of seconds in time string
def get_seconds(time_in):
    seconds=float(time_in[6:len(time_in)])
    return seconds

In [6]:
#return total time in entire string, converted to minutes
def get_time_as_min(time_in):
    time_in_min=60*get_hours(time_in)+get_minutes(time_in)+get_seconds(time_in)/60
    return time_in_min

In [7]:
#return gender label
def get_gender(class_in):
    return class_in[0]

** Plot **
- loop over defined columns
    - create and normalize histogram
    - plot histogram, mirrored and with shaded center
    - save figure in local folder

In [9]:
#columns to plot
columns=[6,7,8,9]
plot_titles=['Swim, 1.5k','Bike, 40k','Run, 10k','Finish time']

#colour settings
#TTF colors sampled from logo using imageJ. 
#A few slightly different color values depending on which logo used.
darken=0.5
Ttf_blue=(158/255.0,216/255.0,236/255.0)
Ttf_blue_dark=(darken*158/255.0,216/255.0,236/255.0)
Ttf_red=(225/255.0,3/255.0,24/255.0)
Ttf_red_dark=(darken*225/255.0,darken*3/255.0,darken*24/255.0)

#plot data for each specified column, save fig.
for col_ind in range(len(columns)):
    #summarize data for specified column
    col=columns[col_ind]
    total_time_men=[]
    total_time_women=[]

    no_total_time=0#count how many entries are lacking total time
    for ind in range(len(data)):
        if get_gender(data[ind][2])=='M':
            try:
                total_time_men.append(get_time_as_min(data[ind][col]))
            except:
                no_total_time +=1
        elif get_gender(data[ind][2])=='F':
            try:
                total_time_women.append(get_time_as_min(data[ind][col]))
            except:
                no_total_time+=1

    #bad data
    print str(no_total_time) + ' of '+str(len(data))+' entries lack a time for the category ' + plot_titles[col_ind]

    #calcualte histograms
    bins = np.linspace(0, 300, 50)#bins for histogram
    bin_n_men, bin_edges_men=np.histogram(total_time_men,bins)
    bin_n_women, bin_edges_women=np.histogram(total_time_women,bins)

    #determine bin centers (numpy.histogram gives edges)
    bin_centers_men=[]
    bin_centers_women=[]
    for ind in range(len(bin_edges_men)-1):
        bin_centers_men.append((bin_edges_men[ind]+bin_edges_men[ind+1])/2.0)
        bin_centers_women.append((bin_edges_women[ind]+bin_edges_women[ind+1])/2.0)
    
    #normalize histograms
    bin_n_men_norm=np.divide(bin_n_men,float(np.max(bin_n_men)))
    bin_n_women_norm=np.divide(bin_n_women,float(np.max(bin_n_women)))
    
    #plot
    plt.figure("TTF results 2018", figsize=(16,8),facecolor='k')
    shift=-4
    #plot lines
    #only label single instance for each gender
    plt.plot(bin_centers_men, bin_n_men_norm,color=Ttf_blue, linewidth=3,label='Men')
    plt.plot(bin_centers_women,shift+ bin_n_women_norm,color=Ttf_red, linewidth=3,label='Women')
    plt.plot(bin_centers_men, -bin_n_men_norm,color=Ttf_blue, linewidth=3)
    plt.plot(bin_centers_women, shift-bin_n_women_norm,color=Ttf_red, linewidth=3,)
    
    #fill space between lines with darker color
    plt.fill(bin_centers_men, -bin_n_men_norm,color=Ttf_blue_dark)
    plt.fill(bin_centers_men, bin_n_men_norm,color=Ttf_blue_dark)
    plt.fill(bin_centers_women, shift-bin_n_women_norm,color=Ttf_red_dark)
    plt.fill(bin_centers_women, shift+bin_n_women_norm,color=Ttf_red_dark)

    #uniform limits across all figures
    plt.xlim([0,300])
    plt.ylim([-6,2])

    #axis colors
    ax=plt.gca()
    ax.patch.set_facecolor('k')
    ax.spines['bottom'].set_color('w')
    ax.spines['top'].set_color('k') 
    ax.spines['right'].set_color('k')
    ax.spines['left'].set_color('k')
    ax.xaxis.set_ticks_position('bottom')
    ax.tick_params(axis='x', colors='w')

    #labels
    plt.xlabel('Total time [minutes]', color='w', fontsize=18)
    plt.title(plot_titles[col_ind],color='w', fontsize=28)
    plt.legend(loc='best')
    #update legend text color
    l_text=ax.legend().get_texts()
    l_text[0].set_color(Ttf_blue)
    l_text[1].set_color(Ttf_red)
    #remove legend box
    ax.get_legend().get_frame().set_facecolor('none')

    #save figure
    plt.savefig('TTF 2018_'+str(col_ind),facecolor='k')
    plt.close()

117 of 427 entries lack a time for the category Swim, 1.5k
124 of 427 entries lack a time for the category Bike, 40k
129 of 427 entries lack a time for the category Run, 10k
131 of 427 entries lack a time for the category Finish time
