In [108]:
# https://preppindata.blogspot.com/2023/07/2023-week-28-prep-school-track-team.html

import pandas as pd
import numpy as np
from datetime import date

### Input the data

In [109]:
df_students = pd.read_excel(r'data\PD 2023 Wk 28 Input.xlsx', sheet_name='Students')
df_times = pd.read_excel(r'data\PD 2023 Wk 28 Input.xlsx', sheet_name='Track Times')
df_benchmarks = pd.read_excel(r'data\PD 2023 Wk 28 Input.xlsx', sheet_name='Benchmarks')
df_students

Unnamed: 0,id,first_name,last_name,gender,age
0,1,Jesse,Motto,M,10
1,2,Wendall,Banger,M,10
2,3,Iosep,Wateridge,M,11
3,4,Larina,Thomas,F,8
4,5,Linzy,Barock,F,11
...,...,...,...,...,...
295,296,Tonya,Milleton,F,11
296,297,Tyson,Connealy,M,10
297,298,Hilde,Vanyashkin,F,10
298,299,Miranda,Ropert,F,8


### Join the students basic information with their event and track time 


In [110]:
df_times.columns

Index(['id', 'track_event', 'time'], dtype='object')

In [111]:
df_stu_track = pd.merge(df_students,df_times,on='id')
df_stu_track


Unnamed: 0,id,first_name,last_name,gender,age,track_event,time
0,1,Jesse,Motto,M,10,200m,32.39
1,2,Wendall,Banger,M,10,200m,39.78
2,3,Iosep,Wateridge,M,11,100m,15.39
3,4,Larina,Thomas,F,8,200m,18.10
4,5,Linzy,Barock,F,11,200m,27.00
...,...,...,...,...,...,...,...
295,296,Tonya,Milleton,F,11,100m,19.62
296,297,Tyson,Connealy,M,10,100m,19.93
297,298,Hilde,Vanyashkin,F,10,200m,21.60
298,299,Miranda,Ropert,F,8,100m,17.23


### Join the benchmark table with the right students
- Note: the number of rows should still be 300 after the join

In [112]:
df_benchmarks.columns

Index(['Gender ', 'Age ', 'Event ', 'Benchmark'], dtype='object')

In [113]:
df_stu_track_bench = pd.merge(df_stu_track,df_benchmarks,left_on=['gender','age','track_event'],right_on=['Gender ', 'Age ', 'Event '],how='inner')
df_stu_track_bench


Unnamed: 0,id,first_name,last_name,gender,age,track_event,time,Gender,Age,Event,Benchmark
0,1,Jesse,Motto,M,10,200m,32.39,M,10,200m,29.0
1,2,Wendall,Banger,M,10,200m,39.78,M,10,200m,29.0
2,22,Dallas,Ratchford,M,10,200m,24.58,M,10,200m,29.0
3,45,Jefferey,Cubley,M,10,200m,34.05,M,10,200m,29.0
4,77,Eben,McPeeters,M,10,200m,15.99,M,10,200m,29.0
...,...,...,...,...,...,...,...,...,...,...,...
295,194,Madelin,Kenelin,F,9,100m,44.82,F,9,100m,15.5
296,206,Bertie,Brettell,F,9,100m,42.28,F,9,100m,15.5
297,216,Jordana,Lear,F,9,100m,38.59,F,9,100m,15.5
298,261,Sally,Barca,F,9,100m,20.31,F,9,100m,15.5


### Filter out the students that did not fall within the benchmark


In [114]:
df_stu_track_bench = df_stu_track_bench[df_stu_track_bench['time']<=df_stu_track_bench['Benchmark']]
df_stu_track_bench.columns

Index(['id', 'first_name', 'last_name', 'gender', 'age', 'track_event', 'time',
       'Gender ', 'Age ', 'Event ', 'Benchmark'],
      dtype='object')

In [115]:
df_stu_track_bench

Unnamed: 0,id,first_name,last_name,gender,age,track_event,time,Gender,Age,Event,Benchmark
2,22,Dallas,Ratchford,M,10,200m,24.58,M,10,200m,29.0
4,77,Eben,McPeeters,M,10,200m,15.99,M,10,200m,29.0
5,83,Kevon,Hannum,M,10,200m,25.05,M,10,200m,29.0
7,122,Corbie,Phython,M,10,200m,21.93,M,10,200m,29.0
8,132,Alwyn,Askaw,M,10,200m,20.44,M,10,200m,29.0
...,...,...,...,...,...,...,...,...,...,...,...
279,230,Buiron,Attreed,M,9,200m,21.28,M,9,200m,30.0
281,239,Willey,Joye,M,9,200m,24.18,M,9,200m,30.0
282,240,Robbert,Mingay,M,9,200m,21.63,M,9,200m,30.0
286,269,Barbabas,Horick,M,9,200m,29.65,M,9,200m,30.0


### There has been an error with the collection of the times, so remove any 200m times that fall below 25 seconds


In [116]:
# Filtering out rows with time less than 25 seconds for the 200m track event
df_output = df_stu_track_bench[~((df_stu_track_bench['track_event'] == '200m') & (df_stu_track_bench['time'] < 25))]

### Rank the students fastest to slowest for each event

In [117]:
df_output['Rank'] = df_output.groupby('track_event')['time'].rank().astype(int)
df_output = df_output.sort_values(by='Rank')
df_output

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_output['Rank'] = df_output.groupby('track_event')['time'].rank().astype(int)


Unnamed: 0,id,first_name,last_name,gender,age,track_event,time,Gender,Age,Event,Benchmark,Rank
5,83,Kevon,Hannum,M,10,200m,25.05,M,10,200m,29.0,1
189,127,Gradey,Slevin,M,9,100m,14.31,M,9,100m,14.5,1
79,6,Ganny,Blouet,M,8,100m,14.31,M,8,100m,15.0,1
109,262,Parker,Texton,M,8,200m,25.51,M,8,200m,31.0,2
134,236,Celeste,Kendred,F,8,100m,14.43,F,8,100m,16.0,3
269,100,Brodie,Terris,M,9,200m,25.81,M,9,200m,30.0,3
100,99,Ogden,Scammonden,M,8,200m,25.92,M,8,200m,31.0,4
110,263,Randall,Valentinetti,M,8,200m,26.02,M,8,200m,31.0,5
148,131,Silvia,Viggers,F,10,200m,26.13,F,10,200m,30.0,6
139,14,Arliene,Bonsul,F,10,200m,26.64,F,10,200m,30.0,7


### Output the data

In [118]:
df_output.to_csv(r'output/2023-week28-output.csv')