In [6]:
import requests
import json
import pandas as pd
import numpy as np

from datetime import datetime,timedelta

In [7]:
import dotenv
import os

# Load secret file
dotenv.load_dotenv('.env')

headers = {
  "Authorization":f"apikey {os.environ.get('apikey')}"
}
url_carparks = 'https://api.transport.nsw.gov.au/v1/carpark'
# url_carpark_history = 'https://api.transport.nsw.gov.au/v1/carpark/history'

list_of_carparks = requests.get(url_carparks, headers=headers).json()

# Create a json file and write the data if it doesn't exist
if not os.path.exists('./data/carparks.json'):
  with open('./data/carparks.json', 'w') as f:
    json.dumps(list_of_carparks, f)

# The format of the file itself is a dictionary, thus requiring some creativity to read
with open('./data/carparks.json') as f:
  data = json.load(f)

# Convert the dictionary to a dataframe
df_carparks = pd.DataFrame.from_dict(data, orient='index')
# Resetting the index to label the columns afterwards
df_carparks = df_carparks.reset_index()
df_carparks.columns = ['API_ID', 'CarParkName']

df_carparks

Unnamed: 0,API_ID,CarParkName
0,1,Tallawong Station Car Park (historical only)
1,10,Warriewood Car Park
2,11,Narrabeen Car Park
3,12,Mona Vale Car Park
4,13,Dee Why Car Park
5,14,West Ryde Car Park
6,15,Sutherland East Parade Car Park
7,16,Leppington Car Park
8,17,Edmondson Park South Car Park
9,18,St Marys Car Park


Attempting to create a date_time function that gives the dates of all days in an array from 31st December 2023 to a target date that is separated by a time delta

In [19]:
def date_getter(td):
  # Array that stores the dates to be searched for
  date_period_array = []
  
  # The last date to be searched for
  cutoff_date = datetime(2023,12,31)
  target_date = cutoff_date - td
  
  # Ensure that records of each day are obtained
  delta = timedelta(days=1)
  
  while target_date <= cutoff_date:
    date_period_array.append(target_date.strftime("%Y-%m-%d"))
    target_date += delta

  return date_period_array

In [36]:
import os
from concurrent.futures import ThreadPoolExecutor
import requests
import pandas as pd
import json

def fetch_carpark_data(url, headers):
    """
    Fetch carpark data from the given URL with the provided headers.

    Args:
    url (str): The URL to fetch the data from.
    headers (dict): The headers to be included in the request.

    Returns:
    dict: The JSON response from the API.
    """
    response = requests.get(url, headers=headers)
    return response.json()

def carpark_history_period(facility, dates, json_file_path):
    """
    Fetch carpark history data for a specific facility and date range, and save it to a JSON file.

    Args:
    facility (str): The specific carpark facility.
    dates (list): Array of dates for the history period.
    json_file_path (str): The file path to save the JSON data.

    Returns:
    pandas.DataFrame: The carpark history data as a pandas DataFrame.
    """
    data_array = []
    headers = {
        "Authorization": f"apikey {os.environ.get('apikey')}"
    }

    with requests.Session() as session:
        with ThreadPoolExecutor() as executor:
            futures = [executor.submit(fetch_carpark_data, f'https://api.transport.nsw.gov.au/v1/carpark/history?facility={facility}&eventdate={date}', headers) for date in dates]
            for future in futures:
                response = future.result()
                data_array += response

    with open(json_file_path, 'w') as f:
        json.dump(data_array, f)

    return pd.DataFrame(data_array)

In [38]:
df = carpark_history_period(facility=14, dates_array=date_getter(td=timedelta(days=2)), json_file_path='./data/carpark_history_facility_14.json')

print('LENGTH:',len(df))
print(df)

LENGTH: 945
        tsn       time spots  \
0    211420  757083975   151   
1    211420  757084211   151   
2    211420  757084439   151   
3    211420  757085039   151   
4    211420  757085641   151   
..      ...        ...   ...   
940  211420  757340865   151   
941  211420  757341062   151   
942  211420  757341168   151   
943  211420  757341769   151   
944  211420  757342370   151   

                                                 zones ParkID  \
0    [{'spots': '151', 'zone_id': '1', 'occupancy':...      1   
1    [{'spots': '151', 'zone_id': '1', 'occupancy':...      1   
2    [{'spots': '151', 'zone_id': '1', 'occupancy':...      1   
3    [{'spots': '151', 'zone_id': '1', 'occupancy':...      1   
4    [{'spots': '151', 'zone_id': '1', 'occupancy':...      1   
..                                                 ...    ...   
940  [{'spots': '151', 'zone_id': '1', 'occupancy':...      1   
941  [{'spots': '151', 'zone_id': '1', 'occupancy':...      1   
942  [{'spots': '1

In [34]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 945 entries, 0 to 944
Data columns (total 10 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   tsn                945 non-null    object
 1   time               945 non-null    object
 2   spots              945 non-null    object
 3   zones              945 non-null    object
 4   ParkID             945 non-null    object
 5   occupancy          945 non-null    object
 6   MessageDate        945 non-null    object
 7   facility_id        945 non-null    object
 8   facility_name      945 non-null    object
 9   tfnsw_facility_id  945 non-null    object
dtypes: object(10)
memory usage: 74.0+ KB
