In [1]:
### Notebook for plotting graphics from polygons
import math
from math import sin, cos, sqrt, atan2, radians
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, Polygon, LineString, MultiLineString
from shapely.geometry import MultiPolygon
from bs4 import BeautifulSoup

#### Load data from kml file exported by Google Earth

file_path = ('../data/kml/ballparks.kml')

with open(file_path) as file:

    xml_data = file.read()



# Initialize soup variables for parsing file
soup = BeautifulSoup(xml_data, 'xml')

folders = soup.Document.Folder
list = soup.Document.Folder.find_all('Folder')
# layers = soup.Document.Folder.Folder
# polygons = soup.Document.Folder.Placemark.Polygon

## Create a dataframe to hold the data parsed from xml
df = pd.DataFrame(columns=['field', 'foul', 'fop'])


## Loop through the folders and extract the data
i = 0   

for i in range(len(list)):

    folders = list[i]
    field_name = folders.find('name').text
    foul = folders.find_all('coordinates')[0].text
    fop = folders.find_all('coordinates')[1].text

    row = {
        'field': field_name,
        'foul': foul,
        'fop': fop
    }

    i+=1

    df = df.append(row, ignore_index=True)

## Cleaning
# remove new line and and space characters from coordinates
df = df.replace(r'\n','', regex=True) 
df = df.replace(r'\t','', regex=True) 

## Drop any duplicate rows
df = df.drop_duplicates(subset=['field'], keep='first')

## Drop any rows with empty fields
df = df[(df != 0).all(1)]

# ## remove any numberic characters and . from field names
# # There shouldn't be any anymore because of the validation I did before exporting kml
# df['field'] = df['field'].str.replace(r'\d+', '')

## Parse field names to get level column using regex
import re
re_mlb = re.compile(r'mlb', re.IGNORECASE)
re_pro = re.compile(r'pro', re.IGNORECASE)
re_college = re.compile(r'college', re.IGNORECASE)
re_youth = re.compile(r'youth', re.IGNORECASE)
re_muni = re.compile(r'muni', re.IGNORECASE)

df['level'] = df['field'].apply(lambda x: 'mlb' 
        if re_mlb.search(x) else 'pro' 
        if re_pro.search(x) else 'college' 
        if re_college.search(x) else 'youth' 
        if re_youth.search(x) else 'muni' 
        if re_muni.search(x) else 'high_school')

# clean up the field names
# remove the level from the field name
df['field'] = df['field'].str.replace(r'MLB', '')
df['field'] = df['field'].str.replace(r'pro', '')
df['field'] = df['field'].str.replace(r'college', '')
df['field'] = df['field'].str.replace(r'High School', 'HS')
# remove - from end of field name
df['field'] = df['field'].str.replace(r'- $', '')

## Output test csv
# df.to_csv('TEMP/level2_tost.csv', index=False)

df['foul_poly'] = df['foul'].apply(lambda x: {'type': 'Polygon', 'coordinates': [[tuple(map(float, coord.split(','))) for coord in x.split()]]})
df['fop_poly'] = df['fop'].apply(lambda x: {'type': 'Polygon', 'coordinates': [[tuple(map(float, coord.split(','))) for coord in x.split()]]})

## get homeplate coordinates

df['fop_hp'] = df['fop'].apply(lambda x: tuple(map(float, x.split(',')[0:2])))

#### Merge the foul_poly and fop_poly columns into a single polygon column

import shapely.geometry as geom
import shapely.ops as ops
from shapely import affinity

# Convert the fop_hp column to a shapely point
df['fop_hp'] = df['fop_hp'].apply(lambda x: Point(x))

df['poly'] = df.apply(lambda x: ops.unary_union([geom.shape(x['foul_poly']), geom.shape(x['fop_poly'])]), axis=1)

### Drop unnecessary columns

df = df.drop(columns=['foul', 'fop', 'foul_poly', 'fop_poly'])

## Rename fop_hp column to homeplate
df = df.rename(columns={'fop_hp': 'homeplate'}) 

  df = df.append(row, ignore_index=True)
  df = df.append(row, ignore_index=True)
  df = df.append(row, ignore_index=True)
  df = df.append(row, ignore_index=True)
  df = df.append(row, ignore_index=True)
  df = df.append(row, ignore_index=True)
  df = df.append(row, ignore_index=True)
  df = df.append(row, ignore_index=True)
  df = df.append(row, ignore_index=True)
  df = df.append(row, ignore_index=True)
  df = df.append(row, ignore_index=True)
  df = df.append(row, ignore_index=True)
  df = df.append(row, ignore_index=True)
  df = df.append(row, ignore_index=True)
  df = df.append(row, ignore_index=True)
  df = df.append(row, ignore_index=True)
  df = df.append(row, ignore_index=True)
  df = df.append(row, ignore_index=True)
  df = df.append(row, ignore_index=True)
  df = df.append(row, ignore_index=True)
  df = df.append(row, ignore_index=True)
  df = df.append(row, ignore_index=True)
  df = df.append(row, ignore_index=True)
  df = df.append(row, ignore_index=True)
  df = df.append

In [2]:
# # Upgrade bitches This code was generated by gpt4

# # The original code was measuring distances every degree for every field but it had run for nearly 30 minutes without finishing.

# ## In this code I changed the step to every five degrees to reduce compute

# # Additional imports
# from shapely.ops import nearest_points
# from math import radians, sin, cos

# # Function to calculate distances from home plate to the exterior boundary
# def calculate_distances(home_plate, field_boundary, step_angle=5):
#     distances = []
#     for angle in range(0, 360, step_angle):
#         angle_rad = radians(angle)
#         search_radius = 0
#         search_step = 10
#         search_point = None

#         while True:
#             search_radius += search_step
#             search_x = home_plate.x + search_radius * cos(angle_rad)
#             search_y = home_plate.y + search_radius * sin(angle_rad)
#             search_point_candidate = Point(search_x, search_y)
#             if field_boundary.contains(search_point_candidate):
#                 search_point = search_point_candidate
#             else:
#                 if search_point is not None:
#                     break

#         if search_point is not None:
#             nearest_boundary_point = nearest_points(search_point, field_boundary)[1]
#             distance = home_plate.distance(nearest_boundary_point) * 3.28084  # Convert to feet
#             distances.append((angle, distance))

#     return distances

# # Apply the function to each row in the DataFrame with step_angle=5
# df['distances'] = df.apply(lambda x: calculate_distances(x['homeplate'], x['poly'], step_angle=5), axis=1)



In [3]:
# Function to get the appropriate UTM EPSG code based on latitude
def get_utm_epsg(lat):
    utm_band = str((math.floor((lat + 180) / 6) % 60) + 1)
    if len(utm_band) == 1:
        utm_band = '0' + utm_band
    if lat >= 0:
        epsg_code = '326' + utm_band
    else:
        epsg_code = '327' + utm_band
    return epsg_code

# Function to calculate distances from home plate to the exterior boundary
def calculate_distances(home_plate, field_boundary, step_angle=1):
    distances = []
    
    # Get the appropriate UTM CRS for the home_plate point
    utm_crs = get_utm_epsg(home_plate.y)
    
    # Create GeoDataFrames for home_plate and field_boundary
    home_plate_gdf = gpd.GeoDataFrame({'geometry': [home_plate]}, crs='EPSG:4326')
    field_boundary_gdf = gpd.GeoDataFrame({'geometry': [field_boundary]}, crs='EPSG:4326')
    
    # Convert the home_plate and field_boundary to UTM CRS
    home_plate_utm = home_plate_gdf.to_crs(utm_crs).iloc[0].geometry
    field_boundary_utm = field_boundary_gdf.to_crs(utm_crs).iloc[0].geometry

    for angle in range(0, 360, step_angle):
        angle_rad = radians(angle)
        
        # Create a line from the home plate to a faraway point in the direction of the angle
        search_x = home_plate_utm.x + 10000 * cos(angle_rad)
        search_y = home_plate_utm.y + 10000 * sin(angle_rad)
        search_line = LineString([home_plate_utm, Point(search_x, search_y)])
        
        # Calculate the intersection between the line and the field boundary
        intersection = field_boundary_utm.intersection(search_line)
        
        # Handle cases where the intersection returns multiple LineStrings
        if isinstance(intersection, MultiLineString):
            intersection = linemerge(intersection)
        
        # Calculate the distance from the home plate to the intersection point
        if intersection.is_empty:
            distance = 0
        else:
            distance = home_plate_utm.distance(intersection) * 3.28084  # Convert to feet

        distances.append((angle, distance))

    return distances

# Apply the function to each row in the DataFrame with step_angle=5
df['distances'] = df.apply(lambda x: calculate_distances(x['homeplate'], x['poly'], step_angle=5), axis=1)


NameError: name 'MultiLineString' is not defined

In [None]:
df.head()

In [None]:
import numpy as np
import pandas as pd
from shapely.geometry import Point, LineString

# Define a function to measure distance from a point to a line segment
def dist_to_line_segment(p, l):
    # Get the distance from the point to the line
    dist = p.distance(l)
    # Get the intersection point between the line and the perpendicular line that goes through the point
    intersection = l.interpolate(l.project(p))
    # Check if the intersection point is within the line segment
    if intersection.distance(LineString([l.coords[0], p])) + intersection.distance(LineString([l.coords[1], p])) == l.length:
        # If it is, return the distance to the intersection point
        return dist
    else:
        # If it's not, return the minimum distance to either end of the line segment
        return min(p.distance(l.coords[0]), p.distance(l.coords[1]))

# Define a function to measure distance from homeplate to the edge of the baseball field
def dist_to_edge(homeplate, poly):
    # Create a list of points on the edge of the polygon
    edge_points = []
    for i in range(360):
        angle = i * np.pi / 180
        point = Point(homeplate.x + np.cos(angle), homeplate.y + np.sin(angle))
        line = LineString([homeplate, point])
        intersection = poly.intersection(line)
        if type(intersection) == Point:
            edge_points.append(intersection)
    # Measure the distance from homeplate to each point on the edge
    distances = [homeplate.distance(p) for p in edge_points]
    return distances

# Add a new column to the dataframe to store the distances
df['edge_distances'] = df.apply(lambda row: dist_to_edge(row['homeplate'], row['poly']), axis=1)


In [None]:
print(df['edge_distances'])