In [3]:
# imports
import pandas as pd
import numpy as np
import folium as fm

## Load Data

In [33]:
#upload the OnRampsFinal.csv file or equivalent
#change the path to your path
path = '/content/OnRampsFinal.csv'
df = pd.read_csv(path)

## Visualization

#### Create the map of Texas

In [34]:
#create a map of Texas
texas_map = fm.Map(location=[31.9686, -99.9018], zoom_start=6)
texas_map

#### Create icons for the schools

In [35]:
# create a feature for the school data
schools = fm.map.FeatureGroup()

In [36]:
# combine the district and the campus into one string
labels = list(zip(df.Campus.tolist(), df.District.tolist()))

In [102]:
import geopandas as gpd
from shapely.geometry import Point
geometry = [Point(xy) for xy in zip(df['X'], df['Y'])]
gdf = gpd.GeoDataFrame(df, geometry=geometry, crs="EPSG:4326")

In [103]:
#create map and add features
texas_map = fm.Map(location=[31.9686, -99.9018], zoom_start=6)

fm.GeoJson(
    gdf,
    name="OnRamps High School Map",
    zoom_on_click=True,
    marker=fm.Marker(icon=fm.Icon(icon='star', color='orange')),
    tooltip=fm.GeoJsonTooltip(fields=["Campus", "District"]),
    popup=fm.GeoJsonPopup(fields=["Campus", "District", "Address"]),
).add_to(texas_map)

texas_map

In [147]:
# Save the map to html
texas_map.save('OnRamps_Full_Map.html')

## **Extra:** Clustering by Distance

In [39]:
# Other imports
from sklearn.cluster import DBSCAN
from sklearn.metrics.pairwise import haversine_distances
from math import radians
import numpy as np

In [104]:
# Function to calculate Haversine distance between two points
def haversine(point1, point2):
    # Convert latitude and longitude from degrees to radians
    point1_rad = [radians(coord) for coord in point1]
    point2_rad = [radians(coord) for coord in point2]

    # Calculate haversine distance
    distance = haversine_distances([point1_rad, point2_rad])[0][1]

    # Multiply by Earth radius to get distance in kilometers
    distance_km = distance * 6371000 / 1000  # convert meters to kilometers

    return distance_km

In [143]:
# implement a distance based clustering method that takes into account the
# earths curvature

epsilon = 10  # max distance between two samples(kilometers)
min_samples = 2  # min number of samples required to form a cluster
dbscan = DBSCAN(eps=epsilon, min_samples=min_samples, metric=haversine)

In [144]:
# cluster our data and add result to dataframe
cluster = dbscan.fit_predict(gdf[['X', 'Y']].values.tolist())
gdf['Cluster'] = cluster

In [145]:
# grab unique colors(currently there are only up to 19 colors - minus grey)
colors = ['lightblue', 'cadetblue', 'darkred', 'pink', 'beige',  'darkgreen',
          'lightred', 'lightgreen', 'darkpurple', 'purple', 'blue', 'red',
          'darkblue', 'orange', 'green', 'white', 'black', 'lightgray']
# map each unique cluster to a color
clust = gdf['Cluster'].drop_duplicates().tolist()
# we want the non clustered colors to be consistent so we temporarily remove -1
if -1 in clust:
  clust.remove(-1)
value_color_dict = dict(zip(clust, colors[:len(clust)]))
# add back -1 and assign it to be grey
value_color_dict[-1] = 'gray'

In [146]:
texas_map = fm.Map(location=[31.0000, -100.0000], zoom_start=6)

# Add markers for each point in the GeoDataFrame with color based on 'Cluster'
for index, row in gdf.iterrows():
    c = value_color_dict.get(row['Cluster'], 'grey')  # Default color is 'green' if not found in dictionary
    fm.Marker(
        location=[row['Y'], row['X']],
        icon=fm.Icon(color=c),
        popup=f'<b>Campus:</b> {row["Campus"]}<br><b>District:</b> {row["District"]}<br><b>Address:</b> {row["Address"]}',
        tooltip=f'<b>Campus:</b> {row["Campus"]}<br><b>District:</b> {row["District"]}'
    ).add_to(texas_map)
texas_map

In [148]:
texas_map.save('OnRamps_full_clustered_map.html')