In [1]:
import requests
import json
import pandas as pd
import numpy as np

from datetime import datetime,timedelta

In [2]:
import dotenv
import os

# Load secret file
dotenv.load_dotenv('.env')

headers = {
  "Authorization":f"apikey {os.environ.get('apikey')}"
}
url_carparks = 'https://api.transport.nsw.gov.au/v1/carpark'
# url_carpark_history = 'https://api.transport.nsw.gov.au/v1/carpark/history'

list_of_carparks = requests.get(url_carparks, headers=headers).json()

# Create a json file and write the data if it doesn't exist
if not os.path.exists('./data/carparks.json'):
  with open('./data/carparks.json', 'w') as f:
    json.dumps(list_of_carparks, f)

# The format of the file itself is a dictionary, thus requiring some creativity to read
with open('./data/carparks.json') as f:
  data = json.load(f)

# Convert the dictionary to a dataframe
df_carparks = pd.DataFrame.from_dict(data, orient='index')
# Resetting the index to label the columns afterwards
df_carparks = df_carparks.reset_index()
df_carparks.columns = ['facility_id', 'CarParkName']

df_carparks

carpark_details_array=[]
for index, row in df_carparks.iterrows():
  facility = row['API_ID']
  url = f'https://api.transport.nsw.gov.au/v1/carpark?facility={facility}'


  # Make request
  response = requests.get(url, headers=headers).json()

  # Add to array
  carpark_details_array.append(response)

carpark_details_array

KeyError: 'API_ID'

In [None]:
# Saving carpark details to file
with open('./data/carpark_details.json', 'w') as f:
  json.dump(carpark_details_array, f)

# Reading as a dataframe
df_carpark_details = pd.read_json('./data/carpark_details.json')
df_carpark_details

Unnamed: 0,tsn,time,spots,zones,ParkID,occupancy,MessageDate,facility_id,facility_name,tfnsw_facility_id
0,2155384,742877319,1004,"[{'spots': '152', 'zone_id': 'CPS-CUD1', 'occu...",1,"{'loop': None, 'total': '981', 'monthlies': No...",2023-07-17T12:48:39,1,Tallawong Station Car Park,2155384CCP001
1,2101131,760433800,244,"[{'spots': '244', 'zone_id': '1', 'occupancy':...",1,"{'loop': None, 'total': '75', 'monthlies': Non...",2024-02-05T18:36:40,10,Warriewood Car Park,2101131TPR001
2,2101130,760434123,46,"[{'spots': '46', 'zone_id': '1', 'occupancy': ...",1,"{'loop': '63843', 'total': '14', 'monthlies': ...",2024-02-05T18:42:03,11,Narrabeen Car Park,2101130TPR001
3,2103108,760434091,68,"[{'spots': '68', 'zone_id': '1', 'occupancy': ...",1,"{'loop': None, 'total': '44', 'monthlies': Non...",2024-02-05T18:41:31,12,Mona Vale Car Park,2103108TPR001
4,2099207,760433747,117,"[{'spots': '117', 'zone_id': '1', 'occupancy':...",1,"{'loop': '53269', 'total': '52', 'monthlies': ...",2024-02-05T18:35:47,13,Dee Why Car Park,2099207TPR001
5,211420,760433971,151,"[{'spots': '151', 'zone_id': '1', 'occupancy':...",1,"{'loop': '228018', 'total': '31', 'monthlies':...",2024-02-05T18:39:31,14,West Ryde Car Park,211420TPR001
6,223210,760434193,373,"[{'spots': '373', 'zone_id': '1', 'occupancy':...",1,"{'loop': None, 'total': '87', 'monthlies': Non...",2024-02-05T18:43:13,15,Sutherland East Parade Car Park,223210TPR001
7,217933,760434198,1884,"[{'spots': '1884', 'zone_id': '1', 'occupancy'...",1,"{'loop': '809193', 'total': '434', 'monthlies'...",2024-02-05T18:43:18,16,Leppington Car Park,217933TPR001
8,217426,760434201,1429,"[{'spots': '1429', 'zone_id': '1', 'occupancy'...",1,"{'loop': '740938', 'total': '136', 'monthlies'...",2024-02-05T18:43:21,17,Edmondson Park South Car Park,217426TPR001
9,276010,760434199,682,"[{'spots': '682', 'zone_id': '1', 'occupancy':...",1,"{'loop': None, 'total': '142', 'monthlies': No...",2024-02-05T18:43:19,18,St Marys Car Park,276010TPR001


Deleting facility ids 486-490 as they will not be used. The source of the info indicates they contain inaccurate data for the columns that will be needed.

In [None]:
# Dropping rows 28,29,30,31,32
df_carpark_details.drop(index=[28,29,30,31,32],inplace=True)

# Resetting the index
df_carpark_details.reset_index(drop=True,inplace=True)

# Sorting by facility_id
df_carpark_details.sort_values(by='facility_id',inplace=True)
df_carpark_details

Unnamed: 0,tsn,time,spots,zones,ParkID,occupancy,MessageDate,facility_id,facility_name,tfnsw_facility_id
0,2155384,742877319,1004,"[{'spots': '152', 'zone_id': 'CPS-CUD1', 'occu...",1,"{'loop': None, 'total': '981', 'monthlies': No...",2023-07-17T12:48:39,1,Tallawong Station Car Park,2155384CCP001
11,2155382,742877319,1374,"[{'spots': '368', 'zone_id': 'CPS-KVE1', 'occu...",1,"{'loop': None, 'total': '1363', 'monthlies': N...",2023-07-17T12:48:39,2,Kellyville Station Car Park,2155382CCP001
22,2153478,742877319,800,"[{'spots': '800', 'zone_id': 'CPS-BLV', 'occup...",1,"{'loop': None, 'total': '314', 'monthlies': No...",2023-07-17T12:48:39,3,Bella Vista Station Car Park,2153478CCP001
27,2154392,742877319,600,"[{'spots': '600', 'zone_id': 'CPS-SHW', 'occup...",1,"{'loop': None, 'total': '532', 'monthlies': No...",2023-07-17T12:48:39,4,Hills Showground Station Car Park,2154392CCP001
28,2126158,742877319,400,"[{'spots': '400', 'zone_id': 'CPS-CHE', 'occup...",1,"{'loop': None, 'total': '400', 'monthlies': No...",2023-07-17T12:48:39,5,Cherrybrook Station Car Park,2126158CCP001
29,207210,760434253,213,"[{'spots': '213', 'zone_id': '1', 'occupancy':...",1,"{'loop': None, 'total': '56', 'monthlies': Non...",2024-02-05T18:44:13,6,Gordon Henry St North Car Park,207210TPR001
30,253330,760434246,42,"[{'spots': '42', 'zone_id': '1', 'occupancy': ...",1,"{'loop': None, 'total': '0', 'monthlies': None...",2024-02-05T18:44:06,7,Kiama Car Park,253330TPR001
31,225040,760434254,1057,"[{'spots': '1057', 'zone_id': '1', 'occupancy'...",1,"{'loop': None, 'total': '368', 'monthlies': No...",2024-02-05T18:44:14,8,Gosford Car Park,225040TPR001
32,221210,760434255,934,"[{'spots': '934', 'zone_id': '1', 'occupancy':...",1,"{'loop': '321462', 'total': '168', 'monthlies'...",2024-02-05T18:44:15,9,Revesby Car Park,221210TPR001
1,2101131,760433800,244,"[{'spots': '244', 'zone_id': '1', 'occupancy':...",1,"{'loop': None, 'total': '75', 'monthlies': Non...",2024-02-05T18:36:40,10,Warriewood Car Park,2101131TPR001


Attempting to create a date_time function that gives the dates of all days in an array from 31st December 2023 to a target date that is separated by a time delta

In [None]:
def date_getter(td):
  # Array that stores the dates to be searched for
  date_period_array = []
  
  # The last date to be searched for
  cutoff_date = datetime(2023,12,31)
  target_date = cutoff_date - td
  
  # Ensure that records of each day are obtained
  delta = timedelta(days=1)
  
  while target_date <= cutoff_date:
    date_period_array.append(target_date.strftime("%Y-%m-%d"))
    target_date += delta

  return date_period_array

In [None]:
import os

def carpark_history_period(facility, dates_array):
    data_array = []
    json_file_path = f"./data/carpark history/facility_{facility}.json"
    
    # Request header
    headers = {
        "Authorization":f"apikey {os.environ.get('apikey')}"
    }
    
    # Deleting file if it exists
    if os.path.exists(json_file_path):
        os.remove(json_file_path)
    
    # Make a request for each date
    for date in dates_array:
        url = f'https://api.transport.nsw.gov.au/v1/carpark/history?facility={facility}&eventdate={date}'
    
        response = requests.get(url, headers=headers).json()
    
        if data_array == []:
            data_array = response
        else:
            data_array = data_array + response
    
    # Saving data to json file
    with open(json_file_path, 'w') as f:
        json.dump(data_array, f)

    # Reading json file
    with open(json_file_path) as f:
        data = json.load(f)
    
    # Converting read data into a pandas dataframe
    return pd.DataFrame(data)

In [None]:
for key,row in df_carparks.iterrows():
  # Get the facility id
  facility = row['API_ID']
  
  # Save file with carpark history for a month
  carpark_history_period(facility=facility, dates_array=date_getter(td=timedelta(days=30)))
  
  # Feedback once done
  print(f"Created file for facility {facility}")

# df.head()

Created file for facility 1
Created file for facility 10
Created file for facility 11
Created file for facility 12
Created file for facility 13
Created file for facility 14
Created file for facility 15
Created file for facility 16
Created file for facility 17
Created file for facility 18
Created file for facility 19
Created file for facility 2
Created file for facility 20
Created file for facility 21
Created file for facility 22
Created file for facility 23
Created file for facility 24
Created file for facility 25
Created file for facility 26
Created file for facility 27
Created file for facility 28
Created file for facility 29
Created file for facility 3
Created file for facility 30
Created file for facility 31
Created file for facility 32
Created file for facility 33
Created file for facility 4
Created file for facility 486
Created file for facility 487
Created file for facility 488
Created file for facility 489
Created file for facility 490
Created file for facility 5
Created file f

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 37114 entries, 0 to 37113
Data columns (total 10 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   tsn                37114 non-null  object
 1   time               37114 non-null  object
 2   spots              37114 non-null  object
 3   zones              37114 non-null  object
 4   ParkID             37114 non-null  object
 5   occupancy          37114 non-null  object
 6   MessageDate        37114 non-null  object
 7   facility_id        37114 non-null  object
 8   facility_name      37114 non-null  object
 9   tfnsw_facility_id  37114 non-null  object
dtypes: object(10)
memory usage: 2.8+ MB


In [None]:
df_carparks

Unnamed: 0,API_ID,CarParkName
0,1,Tallawong Station Car Park (historical only)
1,10,Warriewood Car Park
2,11,Narrabeen Car Park
3,12,Mona Vale Car Park
4,13,Dee Why Car Park
5,14,West Ryde Car Park
6,15,Sutherland East Parade Car Park
7,16,Leppington Car Park
8,17,Edmondson Park South Car Park
9,18,St Marys Car Park
