**School Distance**
-----
This notebook will find the distance between rental properties and the closest high school and primary school, and add it to a fie with distances of other points of intrest.

In [None]:
import googlemaps
import pandas as pd
import numpy as np
from geopy.distance import geodesic
import geopandas as gpd
import zipfile
import os

In [None]:

gmaps = googlemaps.Client(key='AIzaSyBCxTg1qiaMyfmhBDFcHQCbxcjywdENxv0')

# File paths
rental_data_path = '../data/curated/rental_with_distances.csv'
school_data_path = '../data/landing/dv346-schoollocations2023.csv'


# Step 5: Load the rental data CSV (with CBD distances already calculated)
rental_df = pd.read_csv(rental_data_path, encoding = "ISO-8859-1")
school_df = pd.read_csv(school_data_path, encoding = "ISO-8859-1")
high_school_df = school_df[(school_df['School_Type'] == 'Secondary') | (school_df['School_Type'] == 'Pri/Sec')]
primary_school_df = school_df[(school_df['School_Type'] == 'Primary')| (school_df['School_Type'] == 'Pri/Sec')]

# Filter out rows where latitude or longitude is NaN for rental data
rental_df_clean = rental_df.dropna(subset=['latitude', 'longitude']).copy()

# Initialize the new columns for storing distances to the closest train station
rental_df_clean['closest_high_school'] = None
rental_df_clean['closest_primary_school'] = None
rental_df_clean['closest_high_school_type'] = None
rental_df_clean['closest_primary_school_type'] = None
rental_df_clean['straight_line_distance_km_high_school'] = None
rental_df_clean['straight_line_distance_km_primary_school'] = None
rental_df_clean['route_distance_to_closest_high_school_km'] = None
rental_df_clean['route_distance_to_closest_primary_school_km'] = None

# Step 6: Calculate the straight-line (Haversine) distance

def calculate_closest_school(property_coords, school_df):

    min_distance = float('inf')
    closest_school = None
    closest_school_type = None
    
    # Filter out schools with missing coordinates
    valid_schools = school_df.dropna(subset=['Y', 'X'])

    # Iterate through the schools and calculate distance
    for _, school in valid_schools.iterrows():
        school_coords = (school['Y'], school['X'])
        
        try:
            # Calculate straight-line distance
            distance = geodesic(property_coords, school_coords).kilometers
        except ValueError as e:
            print(f"Error calculating distance for school {school['School_Name']}: {e}")
            continue
        
        # Update the closest school if the current one is closer
        if distance < min_distance:
            min_distance = distance
            closest_school = school['School_Name']
            closest_school_type = school['Education_Sector']
    
    return closest_school, closest_school_type, min_distance

# Step 7: Calculate the driving route distance using Google Maps API
def calculate_route_distance(property_coords, school_coords, gmaps_client):
    try:
        # Request the driving distance between the property and the closest train station
        result = gmaps_client.distance_matrix(origins=[property_coords], destinations=[school_coords], mode="driving")
        
        # Check if the result is valid
        if result['rows'][0]['elements'][0]['status'] == 'OK':
            distance = result['rows'][0]['elements'][0]['distance']['value']  # Distance in meters
            return distance / 1000  # Convert from meters to kilometers
        else:
            print(f"No valid route distance found for {property_coords} to station: {result['rows'][0]['elements'][0]['status']}")
            return None
    except Exception as e:
        print(f"Error calculating route distance for {property_coords}: {e}")
        return None

# Step 8: Process each property to calculate the closest train station and route distance
rental_df_clean['coordinates'] = rental_df_clean.apply(lambda row: (row['latitude'], row['longitude']), axis=1)

for idx, row in rental_df_clean.iterrows():
    if pd.isnull(row['route_distance_to_closest_high_school_km']):  
        coords = row['coordinates']
        
        #finding closest high school
        closest_high_school, closest_high_school_type, straight_line_distance_high_school = calculate_closest_school(coords, high_school_df)
        rental_df_clean.loc[idx, 'closest_high_school'] = closest_high_school
        rental_df_clean.loc[idx, 'closest_high_school_type'] = closest_high_school_type
        rental_df_clean.loc[idx, 'straight_line_distance_km_high_school'] = straight_line_distance_high_school
        print(closest_high_school)

        high_school_coords = high_school_df[high_school_df['School_Name'] == closest_high_school][['Y', 'X']].values[0]
        
        
        route_distance_high_school = calculate_route_distance(coords, high_school_coords, gmaps)
        rental_df_clean.loc[idx, 'route_distance_to_closest_high_school_km'] = route_distance_high_school

        #finding closest primary school
        closest_primary_school, closest_primary_school_type, straight_line_distance_primary_school = calculate_closest_school(coords, primary_school_df)
        rental_df_clean.loc[idx, 'closest_primary_school'] = closest_primary_school
        rental_df_clean.loc[idx, 'closest_primary_school_type'] = closest_primary_school_type
        rental_df_clean.loc[idx, 'straight_line_distance_km_primary_school'] = straight_line_distance_primary_school

        primary_school_coords = primary_school_df[primary_school_df['School_Name'] == closest_primary_school][['Y', 'X']].values[0]
        
        
        route_distance_primary_school = calculate_route_distance(coords, primary_school_coords, gmaps)
        rental_df_clean.loc[idx, 'route_distance_to_closest_primary_school_km'] = route_distance_primary_school

    # Save progress every 100 rows
    if (idx + 1) % 100 == 0:
        print(f"Processed {idx + 1} rows, saving progress...")
        rental_df_clean.to_csv(rental_data_path, index=False)

# Final save after processing all data
rental_df_clean.to_csv('../data/curated/rental_distances_cbd_trains_schools.csv', index=False)
print("Processing completed.")


In [None]:
rental_df_clean