In [None]:
import pandas as pd

file_path = '../data/curated/property/property3_addresses_to_lat_lng.csv'
df = pd.read_csv(file_path)

num_parts = 5
rows_per_part = len(df) // num_parts

for i in range(num_parts):
    start_row = i * rows_per_part
    if i == num_parts - 1:
        part_df = df.iloc[start_row:]
    else:
        part_df = df.iloc[start_row:start_row + rows_per_part]
    
    part_df.to_csv(f'../data/curated/property/property3_addresses_to_lat_lng_{i+1}.csv', index=False)

print("The data has been successfully divided into five equal parts and saved.")


# 1

In [None]:
import pandas as pd
import geojson
from geojson import Feature, Point, FeatureCollection

houses_df = pd.read_csv("../data/curated/property/property3_addresses_to_lat_lng_1.csv")
stations_df = pd.read_csv("../data/curated/property/VIC_School_Location.csv", encoding='ISO-8859-1')

houses_df = houses_df.dropna(subset=['longitude', 'latitude'])
stations_df = stations_df.dropna(subset=['longitude', 'latitude']) 


def create_geojson_feature(row, feature_type='house'):
    point = Point((row['longitude'], row['latitude']))
    properties = {
        'address': row.get('address', 'unknown'), 
        'type': feature_type
    }
    return Feature(geometry=point, properties=properties)

house_features = houses_df.apply(lambda row: create_geojson_feature(row, 'house'), axis=1).tolist()
house_feature_collection = FeatureCollection(house_features)

station_features = stations_df.apply(lambda row: create_geojson_feature(row, 'station'), axis=1).tolist()
station_feature_collection = FeatureCollection(station_features)

houses_geojson_output_path = '../data/raw/external/API/3/property_3_school1_geojson.geojson'
stations_geojson_output_path = '../data/raw/external/API/3/vic_school1_geojson.geojson'

with open(houses_geojson_output_path, 'w') as f:
    geojson.dump(house_feature_collection, f)

with open(stations_geojson_output_path, 'w') as f:
    geojson.dump(station_feature_collection, f)

print("GeoJSON files for houses and schools have been generated.")


In [None]:
import geojson
import openrouteservice
import pandas as pd
import time

api_key = '5b3ce3597851110001cf6248a9843d742f314c0d8e9ef5c53d5733a3'
client = openrouteservice.Client(key=api_key)

with open('../data/raw/external/API/3/property_3_school1_geojson.geojson', 'r') as f:
    rent_geojson = geojson.load(f)

with open('../data/raw/external/API/3/vic_school1_geojson.geojson', 'r') as f:
    stations_geojson = geojson.load(f)

def extract_coordinates(features):
    coordinates = []
    for feature in features['features']:
        lon, lat = feature['geometry']['coordinates']
        address = feature['properties'].get('address', 'unknown')
        coordinates.append((lat, lon, address))
    return coordinates

rent_coordinates = extract_coordinates(rent_geojson)
station_coordinates = extract_coordinates(stations_geojson)

house_coords = [(lon, lat) for lat, lon, _ in rent_coordinates]
station_coords = [(lon, lat) for lat, lon, _ in station_coordinates]

def calculate_distance_matrix_in_batches(house_coords, station_coords, batch_size=1):
    all_shortest_distances = []
    
    for i in range(0, len(house_coords), batch_size):
        batch_houses = house_coords[i:i + batch_size]
        try:
            matrix = client.distance_matrix(
                locations=batch_houses + station_coords,
                profile='driving-car',
                sources=list(range(len(batch_houses))),
                destinations=list(range(len(batch_houses), len(batch_houses) + len(station_coords))),
                metrics=['distance'],
                units='km'
            )
            distances = matrix['distances']
            
            for j, house_distances in enumerate(distances[:len(batch_houses)]):
                valid_distances = [d for d in house_distances if d is not None]
                if valid_distances:
                    min_distance = min(valid_distances)
                    closest_station_idx = house_distances.index(min_distance)
                    rent_address = rent_coordinates[i + j][2]
                    closest_station = station_coordinates[closest_station_idx][2]
                    all_shortest_distances.append({
                        'rent_address': rent_address,
                        'closest_station': closest_station,
                        'min_driving_distance_km': min_distance
                    })
        except Exception as e:
            print(f"Error calculating distance matrix: {e}")
        time.sleep(1)
    
    return all_shortest_distances

shortest_distances = calculate_distance_matrix_in_batches(house_coords, station_coords, batch_size=1)

shortest_distances_df = pd.DataFrame(shortest_distances)
output_path = '../data/raw/external/API/3/closest_school1_distance_final_3.csv'
shortest_distances_df.to_csv(output_path, index=False)

print(f"{output_path}")


# 2

In [None]:
import pandas as pd
import geojson
from geojson import Feature, Point, FeatureCollection

houses_df = pd.read_csv("../data/raw/external/API/3/property3_addresses_to_lat_lng_2.csv") 
stations_df = pd.read_csv("../data/raw/external/API/3/VIC_School_Location.csv", encoding='ISO-8859-1') 

houses_df = houses_df.dropna(subset=['longitude', 'latitude'])
stations_df = stations_df.dropna(subset=['longitude', 'latitude']) 

def create_geojson_feature(row, feature_type='house'):
    point = Point((row['longitude'], row['latitude']))
    properties = {
        'address': row.get('address', 'unknown'),
        'type': feature_type
    }
    return Feature(geometry=point, properties=properties)

house_features = houses_df.apply(lambda row: create_geojson_feature(row, 'house'), axis=1).tolist()
house_feature_collection = FeatureCollection(house_features)

station_features = stations_df.apply(lambda row: create_geojson_feature(row, 'station'), axis=1).tolist()
station_feature_collection = FeatureCollection(station_features)

houses_geojson_output_path = '../data/raw/external/API/3/property_3_school2_geojson.geojson'
stations_geojson_output_path = '../data/raw/external/API/3/vic_school2_geojson.geojson'

with open(houses_geojson_output_path, 'w') as f:
    geojson.dump(house_feature_collection, f)

with open(stations_geojson_output_path, 'w') as f:
    geojson.dump(station_feature_collection, f)

print("GeoJSON files for houses and schools have been generated.")


In [None]:
import geojson
import openrouteservice
import pandas as pd
import time

api_key = '5b3ce3597851110001cf62489f29280861b04562b451ff3a2b53abca'
client = openrouteservice.Client(key=api_key)

with open('../data/raw/external/API/3/property_3_school2_geojson.geojson', 'r') as f:
    rent_geojson = geojson.load(f)

with open('../data/raw/external/API/3/vic_school2_geojson.geojson', 'r') as f:
    stations_geojson = geojson.load(f)

def extract_coordinates(features):
    coordinates = []
    for feature in features['features']:
        lon, lat = feature['geometry']['coordinates']
        address = feature['properties'].get('address', 'unknown')
        coordinates.append((lat, lon, address))
    return coordinates

rent_coordinates = extract_coordinates(rent_geojson)
station_coordinates = extract_coordinates(stations_geojson)

house_coords = [(lon, lat) for lat, lon, _ in rent_coordinates]
station_coords = [(lon, lat) for lat, lon, _ in station_coordinates]

def calculate_distance_matrix_in_batches(house_coords, station_coords, batch_size=1):
    all_shortest_distances = []
    
    for i in range(0, len(house_coords), batch_size):
        batch_houses = house_coords[i:i + batch_size]
        try:
            matrix = client.distance_matrix(
                locations=batch_houses + station_coords,
                profile='driving-car',
                sources=list(range(len(batch_houses))),
                destinations=list(range(len(batch_houses), len(batch_houses) + len(station_coords))),
                metrics=['distance'],
                units='km'
            )
            distances = matrix['distances']
            
            for j, house_distances in enumerate(distances[:len(batch_houses)]):

                valid_distances = [d for d in house_distances if d is not None]
                if valid_distances:
                    min_distance = min(valid_distances)
                    closest_station_idx = house_distances.index(min_distance)
                    rent_address = rent_coordinates[i + j][2]
                    closest_station = station_coordinates[closest_station_idx][2]
                    all_shortest_distances.append({
                        'rent_address': rent_address,
                        'closest_station': closest_station,
                        'min_driving_distance_km': min_distance
                    })
        except Exception as e:
            print(f"Error calculating distance matrix: {e}")
        time.sleep(1)  
    return all_shortest_distances


shortest_distances = calculate_distance_matrix_in_batches(house_coords, station_coords, batch_size=1)

shortest_distances_df = pd.DataFrame(shortest_distances)
output_path = '../data/raw/external/API/3/closest_school2_distance_final_3.csv'
shortest_distances_df.to_csv(output_path, index=False)

print(f"{output_path}")


# 3

In [None]:
import pandas as pd
import geojson
from geojson import Feature, Point, FeatureCollection

houses_df = pd.read_csv("../data/raw/external/API/3/property3_addresses_to_lat_lng_3.csv")
stations_df = pd.read_csv("../data/raw/external/API/3/VIC_School_Location.csv", encoding='ISO-8859-1')

houses_df = houses_df.dropna(subset=['longitude', 'latitude'])
stations_df = stations_df.dropna(subset=['longitude', 'latitude'])

def create_geojson_feature(row, feature_type='house'):
    point = Point((row['longitude'], row['latitude']))
    properties = {
        'address': row.get('address', 'unknown'),
        'type': feature_type
    }
    return Feature(geometry=point, properties=properties)

house_features = houses_df.apply(lambda row: create_geojson_feature(row, 'house'), axis=1).tolist()
house_feature_collection = FeatureCollection(house_features)

station_features = stations_df.apply(lambda row: create_geojson_feature(row, 'station'), axis=1).tolist()
station_feature_collection = FeatureCollection(station_features)

houses_geojson_output_path = '../data/raw/external/API/3/property_3_school3_geojson.geojson'
stations_geojson_output_path = '../data/raw/external/API/3/vic_school3_geojson.geojson'

with open(houses_geojson_output_path, 'w') as f:
    geojson.dump(house_feature_collection, f)

with open(stations_geojson_output_path, 'w') as f:
    geojson.dump(station_feature_collection, f)

print("GeoJSON files for houses and schools have been generated.")


In [None]:
import geojson
import openrouteservice
import pandas as pd
import time

api_key = '5b3ce3597851110001cf6248e4dae4ea87cd4bec8d1eaac34e49e093'
client = openrouteservice.Client(key=api_key)

with open('../data/raw/external/API/3/property_3_school3_geojson.geojson', 'r') as f:
    rent_geojson = geojson.load(f)

with open('../data/raw/external/API/3/vic_school3_geojson.geojson', 'r') as f:
    stations_geojson = geojson.load(f)

def extract_coordinates(features):
    coordinates = []
    for feature in features['features']:
        lon, lat = feature['geometry']['coordinates']
        address = feature['properties'].get('address', 'unknown')
        coordinates.append((lat, lon, address))
    return coordinates

rent_coordinates = extract_coordinates(rent_geojson)
station_coordinates = extract_coordinates(stations_geojson)

house_coords = [(lon, lat) for lat, lon, _ in rent_coordinates]
station_coords = [(lon, lat) for lat, lon, _ in station_coordinates]

def calculate_distance_matrix_in_batches(house_coords, station_coords, batch_size=1):
    all_shortest_distances = []
    
    for i in range(0, len(house_coords), batch_size):
        batch_houses = house_coords[i:i + batch_size]
        try:
            matrix = client.distance_matrix(
                locations=batch_houses + station_coords,
                profile='driving-car',
                sources=list(range(len(batch_houses))),
                destinations=list(range(len(batch_houses), len(batch_houses) + len(station_coords))),
                metrics=['distance'],
                units='km'
            )
            distances = matrix['distances']
            
            for j, house_distances in enumerate(distances[:len(batch_houses)]):
                valid_distances = [d for d in house_distances if d is not None]
                if valid_distances:
                    min_distance = min(valid_distances)
                    closest_station_idx = house_distances.index(min_distance)
                    rent_address = rent_coordinates[i + j][2]
                    closest_station = station_coordinates[closest_station_idx][2]
                    all_shortest_distances.append({
                        'rent_address': rent_address,
                        'closest_station': closest_station,
                        'min_driving_distance_km': min_distance
                    })
        except Exception as e:
            print(f"Error calculating distance matrix: {e}")
        time.sleep(1)
    
    return all_shortest_distances

shortest_distances = calculate_distance_matrix_in_batches(house_coords, station_coords, batch_size=1)

shortest_distances_df = pd.DataFrame(shortest_distances)
output_path = '../data/raw/external/API/3/closest_school3_distance_final_3.csv'
shortest_distances_df.to_csv(output_path, index=False)

print(f"{output_path}")


# 4

In [None]:
import pandas as pd
import geojson
from geojson import Feature, Point, FeatureCollection

houses_df = pd.read_csv("../data/raw/external/API/3/property3_addresses_to_lat_lng_4.csv")
stations_df = pd.read_csv("../data/raw/external/API/3/VIC_School_Location.csv", encoding='ISO-8859-1')

houses_df = houses_df.dropna(subset=['longitude', 'latitude'])
stations_df = stations_df.dropna(subset=['longitude', 'latitude']) 

def create_geojson_feature(row, feature_type='house'):
    point = Point((row['longitude'], row['latitude']))
    properties = {
        'address': row.get('address', 'unknown'), 
        'type': feature_type
    }
    return Feature(geometry=point, properties=properties)

house_features = houses_df.apply(lambda row: create_geojson_feature(row, 'house'), axis=1).tolist()
house_feature_collection = FeatureCollection(house_features)

station_features = stations_df.apply(lambda row: create_geojson_feature(row, 'station'), axis=1).tolist()
station_feature_collection = FeatureCollection(station_features)

houses_geojson_output_path = '../data/raw/external/API/3/property_3_school4_geojson.geojson'
stations_geojson_output_path = '../data/raw/external/API/3/vic_school4_geojson.geojson'

with open(houses_geojson_output_path, 'w') as f:
    geojson.dump(house_feature_collection, f)

with open(stations_geojson_output_path, 'w') as f:
    geojson.dump(station_feature_collection, f)

print("GeoJSON files for houses and schools have been generated.")


In [None]:
import geojson
import openrouteservice
import pandas as pd
import time

api_key = '5b3ce3597851110001cf62489a2ee258cd6541dcb5c090058412fd31'
client = openrouteservice.Client(key=api_key)

with open('../data/raw/external/API/3/property_3_school4_geojson.geojson', 'r') as f:
    rent_geojson = geojson.load(f)

with open('../data/raw/external/API/3/vic_school4_geojson.geojson', 'r') as f:
    stations_geojson = geojson.load(f)

def extract_coordinates(features):
    coordinates = []
    for feature in features['features']:
        lon, lat = feature['geometry']['coordinates']
        address = feature['properties'].get('address', 'unknown')
        coordinates.append((lat, lon, address))
    return coordinates

rent_coordinates = extract_coordinates(rent_geojson)
station_coordinates = extract_coordinates(stations_geojson)

house_coords = [(lon, lat) for lat, lon, _ in rent_coordinates]
station_coords = [(lon, lat) for lat, lon, _ in station_coordinates]

def calculate_distance_matrix_in_batches(house_coords, station_coords, batch_size=1):
    all_shortest_distances = []
    
    for i in range(0, len(house_coords), batch_size):
        batch_houses = house_coords[i:i + batch_size]
        try:
            matrix = client.distance_matrix(
                locations=batch_houses + station_coords,
                profile='driving-car',
                sources=list(range(len(batch_houses))),
                destinations=list(range(len(batch_houses), len(batch_houses) + len(station_coords))),
                metrics=['distance'],
                units='km'
            )
            distances = matrix['distances']
            
            for j, house_distances in enumerate(distances[:len(batch_houses)]):

                valid_distances = [d for d in house_distances if d is not None]
                if valid_distances:
                    min_distance = min(valid_distances)
                    closest_station_idx = house_distances.index(min_distance)
                    rent_address = rent_coordinates[i + j][2]
                    closest_station = station_coordinates[closest_station_idx][2]
                    all_shortest_distances.append({
                        'rent_address': rent_address,
                        'closest_station': closest_station,
                        'min_driving_distance_km': min_distance
                    })
        except Exception as e:
            print(f"Error calculating distance matrix: {e}")
        time.sleep(1)
    
    return all_shortest_distances

shortest_distances = calculate_distance_matrix_in_batches(house_coords, station_coords, batch_size=1)

shortest_distances_df = pd.DataFrame(shortest_distances)
output_path = '../data/raw/external/API/3/closest_school4_distance_final_3.csv'
shortest_distances_df.to_csv(output_path, index=False)

print(f"{output_path}")


# 5

In [None]:
import pandas as pd
import geojson
from geojson import Feature, Point, FeatureCollection

houses_df = pd.read_csv("../data/raw/external/API/3/property3_addresses_to_lat_lng_5.csv") 
stations_df = pd.read_csv("../data/raw/external/API/3/VIC_School_Location.csv", encoding='ISO-8859-1')

houses_df = houses_df.dropna(subset=['longitude', 'latitude'])
stations_df = stations_df.dropna(subset=['longitude', 'latitude']) 

def create_geojson_feature(row, feature_type='house'):
    point = Point((row['longitude'], row['latitude']))
    properties = {
        'address': row.get('address', 'unknown'),  
        'type': feature_type
    }
    return Feature(geometry=point, properties=properties)

house_features = houses_df.apply(lambda row: create_geojson_feature(row, 'house'), axis=1).tolist()
house_feature_collection = FeatureCollection(house_features)

station_features = stations_df.apply(lambda row: create_geojson_feature(row, 'station'), axis=1).tolist()
station_feature_collection = FeatureCollection(station_features)

houses_geojson_output_path = '../data/raw/external/API/3/property_3_school5_geojson.geojson'
stations_geojson_output_path = '../data/raw/external/API/3/vic_school5_geojson.geojson'

with open(houses_geojson_output_path, 'w') as f:
    geojson.dump(house_feature_collection, f)

with open(stations_geojson_output_path, 'w') as f:
    geojson.dump(station_feature_collection, f)

print("GeoJSON files for houses and schools have been generated.")


In [None]:
import geojson
import openrouteservice
import pandas as pd
import time

api_key = '5b3ce3597851110001cf6248a18b023a28c74581800179f1bbe915ae' 
client = openrouteservice.Client(key=api_key)

with open('../data/raw/external/API/3/property_3_school5_geojson.geojson', 'r') as f:
    rent_geojson = geojson.load(f)

with open('../data/raw/external/API/3/vic_school5_geojson.geojson', 'r') as f:
    stations_geojson = geojson.load(f)

def extract_coordinates(features):
    coordinates = []
    for feature in features['features']:
        lon, lat = feature['geometry']['coordinates']
        address = feature['properties'].get('address', 'unknown')
        coordinates.append((lat, lon, address))
    return coordinates

rent_coordinates = extract_coordinates(rent_geojson)
station_coordinates = extract_coordinates(stations_geojson)

house_coords = [(lon, lat) for lat, lon, _ in rent_coordinates]
station_coords = [(lon, lat) for lat, lon, _ in station_coordinates]

def calculate_distance_matrix_in_batches(house_coords, station_coords, batch_size=1):
    all_shortest_distances = []
    
    for i in range(0, len(house_coords), batch_size):
        batch_houses = house_coords[i:i + batch_size]
        try:
            matrix = client.distance_matrix(
                locations=batch_houses + station_coords,
                profile='driving-car',
                sources=list(range(len(batch_houses))),
                destinations=list(range(len(batch_houses), len(batch_houses) + len(station_coords))),
                metrics=['distance'],
                units='km'
            )
            distances = matrix['distances']
            
            for j, house_distances in enumerate(distances[:len(batch_houses)]):

                valid_distances = [d for d in house_distances if d is not None]
                if valid_distances:
                    min_distance = min(valid_distances)
                    closest_station_idx = house_distances.index(min_distance)
                    rent_address = rent_coordinates[i + j][2]
                    closest_station = station_coordinates[closest_station_idx][2]
                    all_shortest_distances.append({
                        'rent_address': rent_address,
                        'closest_station': closest_station,
                        'min_driving_distance_km': min_distance
                    })
        except Exception as e:
            print(f"Error calculating distance matrix: {e}")
        time.sleep(1)
    
    return all_shortest_distances

shortest_distances = calculate_distance_matrix_in_batches(house_coords, station_coords, batch_size=1)

shortest_distances_df = pd.DataFrame(shortest_distances)
output_path = '../data/raw/external/API/3/closest_school5_distance_final_3.csv'
shortest_distances_df.to_csv(output_path, index=False)

print(f"{output_path}")


In [None]:
import pandas as pd

file_1 = "../data/raw/external/API/3/closest_school1_distance_final_3.csv"
file_2 = "../data/raw/external/API/3/closest_school2_distance_final_3.csv"
file_3 = "../data/raw/external/API/3/closest_school3_distance_final_3.csv"
file_4 = "../data/raw/external/API/3/closest_school4_distance_final_3.csv"
file_5 = "../data/raw/external/API/3/closest_school5_distance_final_3.csv"


df1 = pd.read_csv(file_1)
df2 = pd.read_csv(file_2)
df3 = pd.read_csv(file_3)
df4 = pd.read_csv(file_4)
df5 = pd.read_csv(file_5)

final_df = pd.concat([df1, df2, df3, df4, df5], ignore_index=True)

final_df.to_csv("../data/raw/external/API/3/closest_school_distance_final_3.csv", index=False)

print("The file merge was successful and has been saved as 'closest_school_distance_final_3.csv'")
