In [1]:
import pandas as pd

public_schools = pd.read_csv('../data/Public_Schools.csv')
colleges_and_unis = pd.read_csv('../data/Colleges_and_Universities.csv')
hospitals = pd.read_csv('../data/Hospitals.csv')
t_stops = pd.read_csv('../data/MBTA_Systemwide_GTFS_T_Stops_Map.csv')
bus_stops = pd.read_csv('../data/MBTA_Systemwide_GTFS_Bus_Stops_Map.csv')

In [2]:
print("Public Schools columns:", public_schools.columns)
print("Colleges and Universities columns:", colleges_and_unis.columns)
print("Hospitals columns:", hospitals.columns)
print("T-Stops columns:", t_stops.columns)
print("Bus Stops columns:", bus_stops.columns)

Public Schools columns: Index(['X', 'Y', 'BLDG_ID', 'BLDG_NAME', 'ADDRESS', 'CITY', 'ZIPCODE',
       'CSP_SCH_ID', 'SCH_ID', 'SCH_NAME', 'SCH_LABEL', 'SCH_TYPE', 'SHARED',
       'COMPLEX', 'OBJECTID'],
      dtype='object')
Colleges and Universities columns: Index(['OBJECTID', 'Match_type', 'Ref_ID', 'ID1', 'Id', 'SchoolId', 'Name',
       'Address', 'City', 'Zipcode', 'Contact', 'PhoneNumbe', 'YearBuilt',
       'NumStories', 'Cost', 'NumStudent', 'BackupPowe', 'ShelterCap',
       'Latitude', 'Longitude', 'Comment', 'X', 'Y', 'NumStudent12',
       'CampusHous', 'NumStudents13', 'URL', 'Address2013'],
      dtype='object')
Hospitals columns: Index(['X', 'Y', 'OBJECTID', 'Census_Tra', 'Name', 'Address', 'City',
       'Zipcode', 'Statea', 'Contact', 'PhoneNumbe', 'Latitude', 'Longitude',
       'Comment_', 'Primary_Alt', 'Alternative_Name', 'DailyAvg'],
      dtype='object')
T-Stops columns: Index(['X', 'Y', 'OBJECTID', 'stop_id', 'stop_code', 'stop_name', 'stop_desc',
       'platf

In [5]:
# Standardize the column names
public_schools = public_schools.rename(columns={'SCH_NAME': 'name', 'X': 'latitude', 'Y': 'longitude'})
public_schools['type'] = 'Public School'    

colleges_and_unis = colleges_and_unis.rename(columns={'Name': 'name', 'Latitude': 'latitude', 'Longitude': 'longitude'})
colleges_and_unis['type'] = 'College/University'

hospitals = hospitals.rename(columns={'Name': 'name', 'Latitude': 'latitude', 'Longitude': 'longitude'})
hospitals['type'] = 'Hospital'

t_stops = t_stops.rename(columns={'stop_name': 'name', 'stop_lat': 'latitude', 'stop_lon': 'longitude'})
t_stops['type'] = 'T-Stop'

bus_stops = bus_stops.rename(columns={'stop_name': 'name', 'stop_lat': 'latitude', 'stop_lon': 'longitude'})
bus_stops['type'] = 'Bus Stop'

# Combine the datasets
combined_dataset = pd.concat([public_schools, colleges_and_unis, hospitals, t_stops, bus_stops], ignore_index=True, sort=False)

# Keep only the desired columns
combined_dataset = combined_dataset[['name', 'type', 'latitude', 'longitude']]

# Save the combined dataset to a CSV file
combined_dataset.to_csv('../data/combined_areas_of_dataset.csv', index=False)


In [12]:
print("Number of rows in the combined dataset:", len(combined_dataset))
print("Rows with missing latitude or longitude:")
print(combined_dataset[combined_dataset[['latitude', 'longitude']].isna().any(axis=1)])

Number of rows in the combined dataset: 7267
Rows with missing latitude or longitude:
Empty DataFrame
Columns: [name, type, latitude, longitude]
Index: []


In [19]:
import folium
# Create a base map
m = folium.Map(location=[42.3601, -71.0589], zoom_start=12)

# Function to determine marker colors based on type
def get_color(type):
    if type == "Public School":
        return "blue"
    elif type == "College/University":
        return "green"
    elif type == "Hospital":
        return "red"
    elif type == "T-Stop":
        return "purple"
    elif type == "Bus Stop":
        return "orange"

# Add circle markers for each location
for index, row in combined_dataset.iterrows():
    folium.CircleMarker(
        location=[row['latitude'], row['longitude']],
        radius=5,
        color=get_color(row['type']),
        fill=True,
        fill_opacity=0.7,
        popup=f"{row['name']} ({row['type']})"
    ).add_to(m)

# Display the map
m.save('../maps/areas_of_importance.html')
m