In [6]:
import pandas as pd
import geopandas as gpd

# File paths
rental_data_path = '/home/Daniel Bi/project two/data/landing/rental_merged.csv'
train_station_shapefile_path = '/home/Daniel Bi/project two/data/landing/PTV_train_station/PTV/PTV_METRO_TRAIN_STATION.shp'

# Step 1: Load the rental data
rental_df = pd.read_csv(rental_data_path)

# Step 2: Load the train station data from shapefile
train_station_gdf = gpd.read_file(train_station_shapefile_path)

# Step 3: Clean the train station names by removing "Railway Station" or "Station"
def clean_station_name(station_name):
    cleaned = station_name.replace("Railway Station", "").replace("Station", "").strip()
    return cleaned

train_station_gdf['Cleaned_Station_Name'] = train_station_gdf['STOP_NAME'].apply(clean_station_name)

# Step 4: Ensure both train station and rental data suburbs are in lowercase for matching, without modifying original columns
train_station_gdf['Cleaned_Station_Name'] = train_station_gdf['Cleaned_Station_Name'].str.lower()

# Step 5: Create a function to check if any train station belongs to the suburb
def has_train_station(suburb, train_station_names):
    # Make sure the suburb is lowercase for matching purposes, without altering the original column
    suburb_lower = suburb.lower()
    return any(suburb_lower in station for station in train_station_names)

# Apply the function to add the 'has_train_station' column
train_station_names = train_station_gdf['Cleaned_Station_Name'].tolist()
rental_df['has_train_station'] = rental_df['Suburb'].apply(lambda x: 1 if has_train_station(x, train_station_names) else 0)

# Step 6: Save the updated rental data back to 'rental_merged.csv'
output_path = '/home/Daniel Bi/project two/data/landing/rental_merged.csv'
rental_df.to_csv(output_path, index=False)

# Display the first few rows to confirm the addition of the new feature
print(rental_df[['Suburb', 'has_train_station']].head())




       Suburb  has_train_station
0  Mordialloc                  1
1   Doncaster                  0
2  Noble Park                  1
3      Albion                  1
4    Sunshine                  1
