Potential data sources: Citi Bike NYC trip history data
https://s3.amazonaws.com/tripdata/index.html
Index(['tripduration', 'starttime', 'stoptime', 'start station id',
       'start station name', 'start station latitude',
       'start station longitude', 'end station id', 'end station name',
       'end station latitude', 'end station longitude', 'bikeid', 'usertype',
       'birth year', 'gender'],
      dtype='object')

Below is the lime data from this paper.
https://ddot.dc.gov/page/dockless-api

In [18]:
import requests
import pandas as pd

def fetch_all_api_data(api_url):
    """
    Fetches all data from the specified API URL and returns a DataFrame.
    
    Parameters:
        api_url (str): The URL of the API.
    
    Returns:
        pandas.DataFrame: DataFrame containing all the API data.
    """
    # Initialize an empty list to store data
    all_data = []

    # Fetch initial page of data
    response = requests.get(api_url)
    data = response.json()
    all_data.extend(data['data']['bikes'])

    # Check if there are more pages
    while 'next_page' in data['data']:
        # Fetch next page of data
        next_page_url = data['data']['next_page']
        response = requests.get(next_page_url)
        data = response.json()
        all_data.extend(data['data']['bikes'])

    # Convert to DataFrame
    df = pd.DataFrame(all_data)
    
    return df

# Our usage
api_url = "https://data.lime.bike/api/partners/v1/gbfs/washington_dc/free_bike_status.json"
df = fetch_all_api_data(api_url)
print(df)
df.head(6)

                                   bike_id      lat      lon  is_reserved  \
0     0d27560d-593b-4eb4-ac0c-3080e4f292ea  38.8974 -76.9261            0   
1     d4ba3ed3-f05e-4715-b741-9b2077160509  38.8808 -76.9281            0   
2     e33154d1-337e-47fb-97e9-498dcb1ddff9  38.8830 -76.9292            0   
3     6a2f1d3b-5da5-468f-b794-2a5204dae23b  38.8845 -76.9298            0   
4     6839e27d-5e79-45ae-9100-9c2ba5651e2c  38.9014 -76.9298            0   
...                                    ...      ...      ...          ...   
6760  49e9f041-4f7b-49b1-a20a-7621fb0ce4aa  38.8144 -77.0182            0   
6761  8011dd1c-8fd0-4734-927c-8830fe64d7df  38.8625 -77.0253            0   
6762  db238a5e-03aa-4ebe-ba1a-023bc8660e73  38.8670 -77.0281            0   
6763  a376349d-1cde-42dc-a79e-7de0d51d3a10  38.8640 -77.0403            0   
6764  02e582a3-8160-4ffc-8540-f24ed6f11cc3  38.8640 -77.0403            0   

      is_disabled vehicle_type  \
0               0         bike   
1      

Unnamed: 0,bike_id,lat,lon,is_reserved,is_disabled,vehicle_type,rental_uris
0,0d27560d-593b-4eb4-ac0c-3080e4f292ea,38.8974,-76.9261,0,0,bike,{'android': 'limebike://map?selected_vehicle_i...
1,d4ba3ed3-f05e-4715-b741-9b2077160509,38.8808,-76.9281,0,0,scooter,{'android': 'limebike://map?selected_vehicle_i...
2,e33154d1-337e-47fb-97e9-498dcb1ddff9,38.883,-76.9292,0,0,scooter,{'android': 'limebike://map?selected_vehicle_i...
3,6a2f1d3b-5da5-468f-b794-2a5204dae23b,38.8845,-76.9298,0,0,bike,{'android': 'limebike://map?selected_vehicle_i...
4,6839e27d-5e79-45ae-9100-9c2ba5651e2c,38.9014,-76.9298,0,0,scooter,{'android': 'limebike://map?selected_vehicle_i...
5,09f0d748-4375-4679-9935-362935285a74,38.8875,-76.9307,0,0,scooter,{'android': 'limebike://map?selected_vehicle_i...


In [13]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6756 entries, 0 to 6755
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   bike_id       6756 non-null   object 
 1   lat           6756 non-null   float64
 2   lon           6756 non-null   float64
 3   is_reserved   6756 non-null   int64  
 4   is_disabled   6756 non-null   int64  
 5   vehicle_type  6756 non-null   object 
 6   rental_uris   6756 non-null   object 
dtypes: float64(2), int64(2), object(3)
memory usage: 369.6+ KB


In [17]:
import pandas as pd

def print_unique_values(df, columns):
    """
    Prints the unique values for each column in the DataFrame.
    
    Parameters:
        df (pandas.DataFrame): The DataFrame.
        columns (list): List of column names for which unique values need to be printed.
    """
    for column in columns:
        unique_values = df[column].unique()
        print("Unique values for column '{}':".format(column))
        print(unique_values)
        print()

# Our usage
columns_to_check = ['is_reserved', 'vehicle_type']
print_unique_values(df, columns_to_check)


Unique values for column 'is_reserved':
[0 1]

Unique values for column 'vehicle_type':
['bike' 'scooter']

