In [1]:
import pandas as pd

In [4]:
df = pd.read_csv('../data/BBData/202506-bluebikes-tripdata.csv')

In [11]:
import pandas as pd

def extract_unique_stations(input_file, output_file):
    """
    Extract unique start stations from bike share CSV data.
    
    Parameters:
    input_file (str): Path to input CSV file
    output_file (str): Path to output CSV file
    """
    
    # Read the input CSV
    df = pd.read_csv(input_file)
    
    # Count occurrences of each start station
    station_counts = df.groupby(['start_station_id', 'start_station_name', 'start_lat', 'start_lng']).size().reset_index(name='Count')
    
    # Remove rows where station_id or station_name is null/empty
    station_counts = station_counts.dropna(subset=['start_station_id', 'start_station_name'])
    
    # Rename columns to match desired output format
    unique_stations = station_counts.rename(columns={
        'start_station_id': 'Number',
        'start_station_name': 'Name', 
        'start_lat': 'Latitude',
        'start_lng': 'Longitude'
    })
    
    # Reorder columns
    unique_stations = unique_stations[['Number', 'Name', 'Latitude', 'Longitude', 'Count']]
    
    # Remove duplicates by Number, keeping the row with highest Count
    unique_stations = unique_stations.sort_values(['Number', 'Count'], ascending=[True, False])
    unique_stations = unique_stations.drop_duplicates(subset=['Number'], keep='first')
    
    # Sort by station ID for consistent output
    unique_stations = unique_stations.sort_values('Number')
    
    # Reset index
    unique_stations = unique_stations.reset_index(drop=True)
    
    # Save to output file
    unique_stations.to_csv(output_file, index=False)
    
    print(f"Extracted {len(unique_stations)} unique stations to {output_file}")
    
    return unique_stations

In [12]:

# Replace with your actual file paths
input_file = "../data/BBData/202506-bluebikes-tripdata.csv"  # Your input CSV file
output_file = "../data/current_stations.csv"  # Desired output file

# Extract unique stations
stations_df = extract_unique_stations(input_file, output_file)

# Display first few rows
print("\nFirst 5 unique stations:")
print(stations_df.head())

Extracted 554 unique stations to ../data/BBData/current_stations.csv

First 5 unique stations:
   Number                                         Name   Latitude  Longitude  \
0  A32000                                     Fan Pier  42.353391 -71.044571   
1  A32001  Union Square - Brighton Ave at Cambridge St  42.353334 -71.137313   
2  A32002              Commonwealth Ave at Agganis Way  42.351692 -71.119035   
3  A32003                B.U. Central - 725 Comm. Ave.  42.350406 -71.108279   
4  A32004                    Longwood Ave at Binney St  42.338629 -71.106500   

   Count  
0    972  
1   1486  
2   2647  
3   1249  
4   2451  
