In [1]:
# Installing geopy library
!pip install geopy

Collecting geopy
  Downloading geopy-2.4.1-py3-none-any.whl (125 kB)
     ------------------------------------- 125.4/125.4 kB 22.6 kB/s eta 0:00:00
Collecting geographiclib<3,>=1.52
  Downloading geographiclib-2.0-py3-none-any.whl (40 kB)
     --------------------------------------- 40.3/40.3 kB 11.1 kB/s eta 0:00:00
Installing collected packages: geographiclib, geopy
Successfully installed geographiclib-2.0 geopy-2.4.1


In [2]:
# Importing other necessary libraries
import pandas as pd
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut
from geopy.extra.rate_limiter import RateLimiter

In [4]:
# Loading dataset into a dataframe
df = pd.read_csv(r"Downloads\ff_race_50.csv")

df.head()

Unnamed: 0,Place,First,Last,City,State,Age,Division,Time,Unnamed: 8,Unnamed: 9,Unnamed: 10
0,1.0,Daniel,Wilson,Tulsa,OK,35.0,M,08:23:01,,,
1,2.0,Eric,Davis,Greenwood,IN,38.0,M,08:57:54,,,
2,3.0,Stewart,Edwards,New Smyrna Beach,FL,43.0,M,09:24:35,,,
3,4.0,Ron,Hammett,Montverde,FL,53.0,M,09:24:36,,,
4,5.0,Seth,Cain,Geneva,FL,44.0,M,09:42:17,,,


In [9]:
# Dropping empty columns
df.drop(columns=["Unnamed: 8","Unnamed: 9","Unnamed: 10"], inplace=True)
df.head()

Unnamed: 0,Place,First,Last,City,State,Age,Division,Time
0,1.0,Daniel,Wilson,Tulsa,OK,35.0,M,08:23:01
1,2.0,Eric,Davis,Greenwood,IN,38.0,M,08:57:54
2,3.0,Stewart,Edwards,New Smyrna Beach,FL,43.0,M,09:24:35
3,4.0,Ron,Hammett,Montverde,FL,53.0,M,09:24:36
4,5.0,Seth,Cain,Geneva,FL,44.0,M,09:42:17


In [10]:
# Creating fullname column
df["fullname"] = df["First"] + " " + df["Last"]
df.head()

Unnamed: 0,Place,First,Last,City,State,Age,Division,Time,fullname
0,1.0,Daniel,Wilson,Tulsa,OK,35.0,M,08:23:01,Daniel Wilson
1,2.0,Eric,Davis,Greenwood,IN,38.0,M,08:57:54,Eric Davis
2,3.0,Stewart,Edwards,New Smyrna Beach,FL,43.0,M,09:24:35,Stewart Edwards
3,4.0,Ron,Hammett,Montverde,FL,53.0,M,09:24:36,Ron Hammett
4,5.0,Seth,Cain,Geneva,FL,44.0,M,09:42:17,Seth Cain


In [12]:
# Converting time columns to a suitable format to allow calculations to be performed on it
df["Time"] = pd.to_timedelta(df["Time"])

In [13]:
# Converting time to minutes
df["Total_Minutes"] = df["Time"].dt.total_seconds() / 60

df.head()

Unnamed: 0,Place,First,Last,City,State,Age,Division,Time,fullname,Total_Minutes
0,1.0,Daniel,Wilson,Tulsa,OK,35.0,M,0 days 08:23:01,Daniel Wilson,503.016667
1,2.0,Eric,Davis,Greenwood,IN,38.0,M,0 days 08:57:54,Eric Davis,537.9
2,3.0,Stewart,Edwards,New Smyrna Beach,FL,43.0,M,0 days 09:24:35,Stewart Edwards,564.583333
3,4.0,Ron,Hammett,Montverde,FL,53.0,M,0 days 09:24:36,Ron Hammett,564.6
4,5.0,Seth,Cain,Geneva,FL,44.0,M,0 days 09:42:17,Seth Cain,582.283333


In [16]:
df["Total_Minutes"] = df["Total_Minutes"].round()

In [19]:
# Confirming changes
df.head()

Unnamed: 0,Place,First,Last,City,State,Age,Division,Time,fullname,Total_Minutes
0,1.0,Daniel,Wilson,Tulsa,OK,35.0,M,0 days 08:23:01,Daniel Wilson,503.0
1,2.0,Eric,Davis,Greenwood,IN,38.0,M,0 days 08:57:54,Eric Davis,538.0
2,3.0,Stewart,Edwards,New Smyrna Beach,FL,43.0,M,0 days 09:24:35,Stewart Edwards,565.0
3,4.0,Ron,Hammett,Montverde,FL,53.0,M,0 days 09:24:36,Ron Hammett,565.0
4,5.0,Seth,Cain,Geneva,FL,44.0,M,0 days 09:42:17,Seth Cain,582.0


In [20]:
# Renaming columns for clarity
df.rename(columns={"Division": "Gender"}, inplace=True)

In [23]:
# Getting latitude and logitude based on city and state
def get_lat_long(city, state):
    address = f"{city}, {state}"
    try:
        geolocator = Nominatim(user_agent="running", timeout=100)
        location = geolocator.geocode(address)
        if location:
            return location.latitude, location.longitude
        else:
            return None, None
    except GeocoderTimedOut:
        return None, None

In [24]:
# Creating columns for the data
df["latitude"], df["longitude"] = zip(*df.apply(lambda x: get_lat_long(x['City'], x['State']), axis=1))

In [25]:
df.head()

Unnamed: 0,Place,First,Last,City,State,Age,Gender,Time,fullname,Total_Minutes,latitude,longitude
0,1.0,Daniel,Wilson,Tulsa,OK,35.0,M,0 days 08:23:01,Daniel Wilson,503.0,36.156312,-95.992752
1,2.0,Eric,Davis,Greenwood,IN,38.0,M,0 days 08:57:54,Eric Davis,538.0,39.613699,-86.109543
2,3.0,Stewart,Edwards,New Smyrna Beach,FL,43.0,M,0 days 09:24:35,Stewart Edwards,565.0,29.025813,-80.927127
3,4.0,Ron,Hammett,Montverde,FL,53.0,M,0 days 09:24:36,Ron Hammett,565.0,28.600277,-81.673964
4,5.0,Seth,Cain,Geneva,FL,44.0,M,0 days 09:42:17,Seth Cain,582.0,28.739716,-81.115062


In [26]:
df["latlong"] = df["latitude"].astype(str) + ", " + df["longitude"].astype(str)

df.head()

Unnamed: 0,Place,First,Last,City,State,Age,Gender,Time,fullname,Total_Minutes,latitude,longitude,latlong
0,1.0,Daniel,Wilson,Tulsa,OK,35.0,M,0 days 08:23:01,Daniel Wilson,503.0,36.156312,-95.992752,"36.1563122, -95.9927516"
1,2.0,Eric,Davis,Greenwood,IN,38.0,M,0 days 08:57:54,Eric Davis,538.0,39.613699,-86.109543,"39.6136987, -86.1095429"
2,3.0,Stewart,Edwards,New Smyrna Beach,FL,43.0,M,0 days 09:24:35,Stewart Edwards,565.0,29.025813,-80.927127,"29.0258132, -80.9271271"
3,4.0,Ron,Hammett,Montverde,FL,53.0,M,0 days 09:24:36,Ron Hammett,565.0,28.600277,-81.673964,"28.6002769, -81.673964"
4,5.0,Seth,Cain,Geneva,FL,44.0,M,0 days 09:42:17,Seth Cain,582.0,28.739716,-81.115062,"28.7397163, -81.1150616"


In [27]:
# Exporting to csv
df.to_csv("Downloads\cleanedupdata.csv")