In [None]:
import googlemaps
import pandas as pd
import time

# Initialize the Google Maps API client with your API key
gmaps = googlemaps.Client(key='AIzaSyDO3Op75ZC9rzjZN-HyFbFveFOzPglOJtc')

# File path for the rental data CSV (with geocoded coordinates)
rental_data_path = '/home/Daniel Bi/project two/data/landing/rental_with_coordinates.csv'

# File path for the output CSV (with distances to Melbourne CBD)
output_path = '/home/Daniel Bi/project two/data/landing/rental_with_cbd_distances.csv'

# Step 1: Load the rental data CSV
rental_df = pd.read_csv(rental_data_path)

# Filter out rows where latitude or longitude is NaN
rental_df_clean = rental_df.dropna(subset=['latitude', 'longitude']).copy()

# Initialize the distance column if not already present
rental_df_clean['distance_to_cbd_km'] = None

# Coordinates for Melbourne CBD (latitude, longitude for Google Maps)
melbourne_cbd_coords = [-37.8136, 144.9631]

# Step 2: Function to calculate driving distance from property to Melbourne CBD using Google Maps
def calculate_distance_to_cbd(property_coords, cbd_coords, gmaps_client):
    try:
        # Request the driving distance from the property to Melbourne CBD
        result = gmaps_client.distance_matrix(origins=[property_coords], destinations=[cbd_coords], mode="driving")
        
        # Check if the result is valid
        if result['rows'][0]['elements'][0]['status'] == 'OK':
            distance = result['rows'][0]['elements'][0]['distance']['value']  # Distance in meters
            return distance / 1000  # Convert from meters to kilometers
        else:
            print(f"No valid distance found for {property_coords} to Melbourne CBD: {result['rows'][0]['elements'][0]['status']}")
            return None
    except Exception as e:
        print(f"Error calculating distance for {property_coords} to Melbourne CBD: {e}")
        return None

# Step 3: Apply the distance calculation for each rental property
rental_df_clean['coordinates'] = rental_df_clean.apply(lambda row: (row['latitude'], row['longitude']), axis=1)

# Step 4: Process each property and save progress incrementally
for idx, row in rental_df_clean.iterrows():
    if pd.isnull(row['distance_to_cbd_km']):  # Only process rows with null distances
        coords = row['coordinates']
        if coords:  # Ensure the coordinates are valid
            distance = calculate_distance_to_cbd(coords, melbourne_cbd_coords, gmaps)
            rental_df_clean.loc[idx, 'distance_to_cbd_km'] = distance

    # Save progress every 100 rows
    if (idx + 1) % 100 == 0:
        print(f"Processed {idx + 1} rows, saving progress...")
        rental_df_clean.to_csv(output_path, index=False)

# Final save after processing all data
rental_df_clean.to_csv(output_path, index=False)
print("Processing completed.")