In [1]:
import pandas as pd
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut
from geopy.extra.rate_limiter import RateLimiter

In [2]:
df = pd.read_csv('ff_race_100.csv')

In [3]:
df.head()

Unnamed: 0,Place,First,Last,City,State,Age,Division,DP,Time
0,1,Ryan,Kunz,Tallahassee,FL,42,M,1,16:34:58
1,2,Joe,Patoray,PARRISH,FL,44,M,2,18:34:41
2,3,Brian,Beal,Lincolnton,NC,30,M,3,19:11:46
3,4,Lucas,Vidal,Cape Coral,FL,38,M,4,20:47:55
4,5,Jared,Patterson,Cincinnati,OH,27,M,5,22:03:30


In [4]:
df2 = df.dropna(axis=1)

In [5]:
df2

Unnamed: 0,Place,First,Last,City,State,Age,Division,DP,Time
0,1,Ryan,Kunz,Tallahassee,FL,42,M,1,16:34:58
1,2,Joe,Patoray,PARRISH,FL,44,M,2,18:34:41
2,3,Brian,Beal,Lincolnton,NC,30,M,3,19:11:46
3,4,Lucas,Vidal,Cape Coral,FL,38,M,4,20:47:55
4,5,Jared,Patterson,Cincinnati,OH,27,M,5,22:03:30
...,...,...,...,...,...,...,...,...,...
62,63,Alina,Sanchez,Lawrenceville,GA,39,F,14,33:27:23
63,64,Joseph,Dionne,Winter Springs,FL,37,M,50,33:29:02
64,65,Matthew,Walker,Marietta,GA,36,M,51,33:30:06
65,66,Toby,Beougher,Sinking Spring,PA,18,M,52,33:32:05


In [6]:
df2['FullName'] = df2['First'] + ' ' + df2['Last'] 

In [7]:
df2['Time'] = pd.to_timedelta(df2['Time'])

In [8]:
df2['TotalMinutes'] = df2['Time'].dt.total_seconds() / 60

In [9]:
df2['TotalMinutes'] = df2['TotalMinutes'].round().astype(int)

In [10]:
df2.head()

Unnamed: 0,Place,First,Last,City,State,Age,Division,DP,Time,FullName,TotalMinutes
0,1,Ryan,Kunz,Tallahassee,FL,42,M,1,0 days 16:34:58,Ryan Kunz,995
1,2,Joe,Patoray,PARRISH,FL,44,M,2,0 days 18:34:41,Joe Patoray,1115
2,3,Brian,Beal,Lincolnton,NC,30,M,3,0 days 19:11:46,Brian Beal,1152
3,4,Lucas,Vidal,Cape Coral,FL,38,M,4,0 days 20:47:55,Lucas Vidal,1248
4,5,Jared,Patterson,Cincinnati,OH,27,M,5,0 days 22:03:30,Jared Patterson,1324


In [11]:
df2.rename(columns={'Division': 'Gender'}, inplace=True)

In [12]:
def get_lat_long(city, state):
    address = f"{city}, {state}"
    try:
        geolocator = Nominatim(user_agent="running_app", timeout=10)
        location = geolocator.geocode(address)
        if location:
            return location.latitude, location.longitude
        else:
            return None, None
    except GeocoderTimedOut:
        return None, None

In [13]:
df2['latitude'], df2['longitude'] = zip(*df2.apply(lambda x: get_lat_long(x['City'], x['State']), axis=1))

In [14]:
df2.head()

Unnamed: 0,Place,First,Last,City,State,Age,Gender,DP,Time,FullName,TotalMinutes,latitude,longitude
0,1,Ryan,Kunz,Tallahassee,FL,42,M,1,0 days 16:34:58,Ryan Kunz,995,30.438083,-84.280933
1,2,Joe,Patoray,PARRISH,FL,44,M,2,0 days 18:34:41,Joe Patoray,1115,27.587536,-82.425094
2,3,Brian,Beal,Lincolnton,NC,30,M,3,0 days 19:11:46,Brian Beal,1152,35.473745,-81.254525
3,4,Lucas,Vidal,Cape Coral,FL,38,M,4,0 days 20:47:55,Lucas Vidal,1248,26.562574,-81.94388
4,5,Jared,Patterson,Cincinnati,OH,27,M,5,0 days 22:03:30,Jared Patterson,1324,39.101454,-84.51246


In [15]:
df2['LatLong'] = df2['latitude'].astype(str) + ', ' + df2['longitude'].astype(str)
df2.head()

Unnamed: 0,Place,First,Last,City,State,Age,Gender,DP,Time,FullName,TotalMinutes,latitude,longitude,LatLong
0,1,Ryan,Kunz,Tallahassee,FL,42,M,1,0 days 16:34:58,Ryan Kunz,995,30.438083,-84.280933,"30.4380832, -84.2809332"
1,2,Joe,Patoray,PARRISH,FL,44,M,2,0 days 18:34:41,Joe Patoray,1115,27.587536,-82.425094,"27.5875357, -82.4250941"
2,3,Brian,Beal,Lincolnton,NC,30,M,3,0 days 19:11:46,Brian Beal,1152,35.473745,-81.254525,"35.473745, -81.2545251"
3,4,Lucas,Vidal,Cape Coral,FL,38,M,4,0 days 20:47:55,Lucas Vidal,1248,26.562574,-81.94388,"26.5625742, -81.9438802"
4,5,Jared,Patterson,Cincinnati,OH,27,M,5,0 days 22:03:30,Jared Patterson,1324,39.101454,-84.51246,"39.1014537, -84.5124602"


In [16]:
df2.to_csv('clean_ff_race_100_output.csv', index=False)