In [9]:
import requests
import json
import pandas as pd
import numpy as np

from datetime import datetime,timedelta

In [10]:
import dotenv
import os

# Load secret file
dotenv.load_dotenv('.env')

headers = {
  "Authorization":f"apikey {os.environ.get('apikey')}"
}
url_carparks = 'https://api.transport.nsw.gov.au/v1/carpark'
# url_carpark_history = 'https://api.transport.nsw.gov.au/v1/carpark/history'

list_of_carparks = requests.get(url_carparks, headers=headers).json()

# Create a json file and write the data if it doesn't exist
if not os.path.exists('./data/carparks.json'):
  with open('./data/carparks.json', 'w') as f:
    json.dumps(list_of_carparks, f)

# The format of the file itself is a dictionary, thus requiring some creativity to read
with open('./data/carparks.json') as f:
  data = json.load(f)

# Convert the dictionary to a dataframe
df_carparks = pd.DataFrame.from_dict(data, orient='index')
# Resetting the index to label the columns afterwards
df_carparks = df_carparks.reset_index()
df_carparks.columns = ['facility_id', 'CarParkName']

df_carparks

carpark_details_array=[]
for index, row in df_carparks.iterrows():
  facility = row['facility_id']
  url = f'https://api.transport.nsw.gov.au/v1/carpark?facility={facility}'


  # Make request
  response = requests.get(url, headers=headers).json()

  # Add to array
  carpark_details_array.append(response)

carpark_details_array

[{'tsn': '2155384',
  'time': '742877319',
  'spots': '1004',
  'zones': [{'spots': '152',
    'zone_id': 'CPS-CUD1',
    'occupancy': {'loop': None,
     'total': '150',
     'monthlies': None,
     'open_gate': None,
     'transients': None},
    'zone_name': 'Tallawong Station At-Grade A Car Park',
    'parent_zone_id': '0'},
   {'spots': '455',
    'zone_id': 'CPS-CUD2',
    'occupancy': {'loop': None,
     'total': '455',
     'monthlies': None,
     'open_gate': None,
     'transients': None},
    'zone_name': 'Tallawong Station At-Grade B Car Park',
    'parent_zone_id': '0'},
   {'spots': '397',
    'zone_id': 'CPS-CUD3',
    'occupancy': {'loop': None,
     'total': '376',
     'monthlies': None,
     'open_gate': None,
     'transients': None},
    'zone_name': 'Tallawong Station At-Grade D Car Park',
    'parent_zone_id': '0'}],
  'ParkID': '1',
  'occupancy': {'loop': None,
   'total': '981',
   'monthlies': None,
   'open_gate': None,
   'transients': None},
  'MessageDate

In [11]:
# Saving carpark details to file
with open('./data/carpark_details.json', 'w') as f:
  json.dump(carpark_details_array, f)

# Reading as a dataframe
df_carpark_details = pd.read_json('./data/carpark_details.json')
df_carpark_details

Unnamed: 0,tsn,time,spots,zones,ParkID,occupancy,MessageDate,facility_id,facility_name,tfnsw_facility_id
0,2155384,742877319,1004,"[{'spots': '152', 'zone_id': 'CPS-CUD1', 'occu...",1,"{'loop': None, 'total': '981', 'monthlies': No...",2023-07-17T12:48:39,1,Tallawong Station Car Park,2155384CCP001
1,2101131,760455665,244,"[{'spots': '244', 'zone_id': '1', 'occupancy':...",1,"{'loop': None, 'total': '17', 'monthlies': Non...",2024-02-06T00:41:05,10,Warriewood Car Park,2101131TPR001
2,2101130,760467741,46,"[{'spots': '46', 'zone_id': '1', 'occupancy': ...",1,"{'loop': '63855', 'total': '2', 'monthlies': N...",2024-02-06T04:02:21,11,Narrabeen Car Park,2101130TPR001
3,2103108,760467287,68,"[{'spots': '68', 'zone_id': '1', 'occupancy': ...",1,"{'loop': None, 'total': '2', 'monthlies': None...",2024-02-06T03:54:47,12,Mona Vale Car Park,2103108TPR001
4,2099207,760467785,117,"[{'spots': '117', 'zone_id': '1', 'occupancy':...",1,"{'loop': '53297', 'total': '28', 'monthlies': ...",2024-02-06T04:03:05,13,Dee Why Car Park,2099207TPR001
5,211420,760468097,151,"[{'spots': '151', 'zone_id': '1', 'occupancy':...",1,"{'loop': '228066', 'total': '1', 'monthlies': ...",2024-02-06T04:08:17,14,West Ryde Car Park,211420TPR001
6,223210,760467948,373,"[{'spots': '373', 'zone_id': '1', 'occupancy':...",1,"{'loop': None, 'total': '18', 'monthlies': Non...",2024-02-06T04:05:48,15,Sutherland East Parade Car Park,223210TPR001
7,217933,760468094,1884,"[{'spots': '1884', 'zone_id': '1', 'occupancy'...",1,"{'loop': '809498', 'total': '177', 'monthlies'...",2024-02-06T04:08:14,16,Leppington Car Park,217933TPR001
8,217426,760467619,1429,"[{'spots': '1429', 'zone_id': '1', 'occupancy'...",1,"{'loop': '741151', 'total': '6', 'monthlies': ...",2024-02-06T04:00:19,17,Edmondson Park South Car Park,217426TPR001
9,276010,760467979,682,"[{'spots': '682', 'zone_id': '1', 'occupancy':...",1,"{'loop': None, 'total': '47', 'monthlies': Non...",2024-02-06T04:06:19,18,St Marys Car Park,276010TPR001


Deleting facility ids 486-490 as they will not be used. The source of the info indicates they contain inaccurate data for the columns that will be needed.

In [12]:
# Dropping rows 28,29,30,31,32
df_carpark_details.drop(index=[28,29,30,31,32],inplace=True)

# Resetting the index
df_carpark_details.reset_index(drop=True,inplace=True)

# Sorting by facility_id
df_carpark_details.sort_values(by='facility_id',inplace=True)
df_carpark_details

Unnamed: 0,tsn,time,spots,zones,ParkID,occupancy,MessageDate,facility_id,facility_name,tfnsw_facility_id
0,2155384,742877319,1004,"[{'spots': '152', 'zone_id': 'CPS-CUD1', 'occu...",1,"{'loop': None, 'total': '981', 'monthlies': No...",2023-07-17T12:48:39,1,Tallawong Station Car Park,2155384CCP001
11,2155382,742877319,1374,"[{'spots': '368', 'zone_id': 'CPS-KVE1', 'occu...",1,"{'loop': None, 'total': '1363', 'monthlies': N...",2023-07-17T12:48:39,2,Kellyville Station Car Park,2155382CCP001
22,2153478,742877319,800,"[{'spots': '800', 'zone_id': 'CPS-BLV', 'occup...",1,"{'loop': None, 'total': '314', 'monthlies': No...",2023-07-17T12:48:39,3,Bella Vista Station Car Park,2153478CCP001
27,2154392,742877319,600,"[{'spots': '600', 'zone_id': 'CPS-SHW', 'occup...",1,"{'loop': None, 'total': '532', 'monthlies': No...",2023-07-17T12:48:39,4,Hills Showground Station Car Park,2154392CCP001
28,2126158,742877319,400,"[{'spots': '400', 'zone_id': 'CPS-CHE', 'occup...",1,"{'loop': None, 'total': '400', 'monthlies': No...",2023-07-17T12:48:39,5,Cherrybrook Station Car Park,2126158CCP001
29,207210,760456446,213,"[{'spots': '213', 'zone_id': '1', 'occupancy':...",1,"{'loop': None, 'total': '0', 'monthlies': None...",2024-02-06T00:54:06,6,Gordon Henry St North Car Park,207210TPR001
30,253330,760468195,42,"[{'spots': '42', 'zone_id': '1', 'occupancy': ...",1,"{'loop': None, 'total': '-2', 'monthlies': Non...",2024-02-06T04:09:55,7,Kiama Car Park,253330TPR001
31,225040,760467850,1057,"[{'spots': '1057', 'zone_id': '1', 'occupancy'...",1,"{'loop': None, 'total': '135', 'monthlies': No...",2024-02-06T04:04:10,8,Gosford Car Park,225040TPR001
32,221210,760468040,934,"[{'spots': '934', 'zone_id': '1', 'occupancy':...",1,"{'loop': '321576', 'total': '62', 'monthlies':...",2024-02-06T04:07:20,9,Revesby Car Park,221210TPR001
1,2101131,760455665,244,"[{'spots': '244', 'zone_id': '1', 'occupancy':...",1,"{'loop': None, 'total': '17', 'monthlies': Non...",2024-02-06T00:41:05,10,Warriewood Car Park,2101131TPR001


Attempting to create a date_time function that gives the dates of all days in an array from 31st December 2023 to a target date that is separated by a time delta

In [13]:
def date_getter(td):
  # Array that stores the dates to be searched for
  date_period_array = []
  
  # The last date to be searched for
  cutoff_date = datetime(2023,12,31)
  target_date = cutoff_date - td
  
  # Ensure that records of each day are obtained
  delta = timedelta(days=1)
  
  while target_date <= cutoff_date:
    date_period_array.append(target_date.strftime("%Y-%m-%d"))
    target_date += delta

  return date_period_array

In [14]:
import os

def carpark_history_period(facility, dates_array):
    data_array = []
    json_file_path = f"./data/carpark history/facility_{facility}.json"
    
    # Request header
    headers = {
        "Authorization":f"apikey {os.environ.get('apikey')}"
    }
    
    # Deleting file if it exists
    if os.path.exists(json_file_path):
        os.remove(json_file_path)
    
    # Make a request for each date
    for date in dates_array:
        url = f'https://api.transport.nsw.gov.au/v1/carpark/history?facility={facility}&eventdate={date}'
    
        response = requests.get(url, headers=headers).json()
    
        if data_array == []:
            data_array = response
        else:
            data_array = data_array + response
    
    # Saving data to json file
    with open(json_file_path, 'w') as f:
        json.dump(data_array, f)

    # Reading json file
    with open(json_file_path) as f:
        data = json.load(f)
    
    # Converting read data into a pandas dataframe
    return pd.DataFrame(data)

In [19]:
for key,row in df_carparks.iterrows():
  # Get the facility id
  facility = row['facility_id']
  
  # Save file with carpark history for a month
  carpark_history_period(facility=facility, dates_array=date_getter(td=timedelta(days=30)))
  
  # Feedback once done
  print(f"Created file for facility {facility}")

# df.head()

FileNotFoundError: [Errno 2] No such file or directory: './data/carpark history/facility_1.json'

In [16]:
# df.info()

In [20]:
# df_carparks

In [22]:
park_details = pd.read_json("data/carpark_details.json")
park_details.head(5)

Unnamed: 0,tsn,time,spots,zones,ParkID,occupancy,MessageDate,facility_id,facility_name,tfnsw_facility_id
0,2155384,742877319,1004,"[{'spots': '152', 'zone_id': 'CPS-CUD1', 'occu...",1,"{'loop': None, 'total': '981', 'monthlies': No...",2023-07-17T12:48:39,1,Tallawong Station Car Park,2155384CCP001
1,2101131,760455665,244,"[{'spots': '244', 'zone_id': '1', 'occupancy':...",1,"{'loop': None, 'total': '17', 'monthlies': Non...",2024-02-06T00:41:05,10,Warriewood Car Park,2101131TPR001
2,2101130,760467741,46,"[{'spots': '46', 'zone_id': '1', 'occupancy': ...",1,"{'loop': '63855', 'total': '2', 'monthlies': N...",2024-02-06T04:02:21,11,Narrabeen Car Park,2101130TPR001
3,2103108,760467287,68,"[{'spots': '68', 'zone_id': '1', 'occupancy': ...",1,"{'loop': None, 'total': '2', 'monthlies': None...",2024-02-06T03:54:47,12,Mona Vale Car Park,2103108TPR001
4,2099207,760467785,117,"[{'spots': '117', 'zone_id': '1', 'occupancy':...",1,"{'loop': '53297', 'total': '28', 'monthlies': ...",2024-02-06T04:03:05,13,Dee Why Car Park,2099207TPR001


In [25]:
park_history = pd.read_json("data/carpark_history_facility_14.json")
park_history.head()

Unnamed: 0,tsn,time,spots,zones,ParkID,occupancy,MessageDate,facility_id,facility_name,tfnsw_facility_id
0,211420,757083975,151,"[{'spots': '151', 'zone_id': '1', 'occupancy':...",1,"{'loop': '215031', 'total': '0', 'monthlies': ...",2023-12-29T00:06:15,14,West Ryde Car Park,211420TPR001
1,211420,757084211,151,"[{'spots': '151', 'zone_id': '1', 'occupancy':...",1,"{'loop': '215032', 'total': '1', 'monthlies': ...",2023-12-29T00:10:11,14,West Ryde Car Park,211420TPR001
2,211420,757084439,151,"[{'spots': '151', 'zone_id': '1', 'occupancy':...",1,"{'loop': '215033', 'total': '0', 'monthlies': ...",2023-12-29T00:13:59,14,West Ryde Car Park,211420TPR001
3,211420,757085039,151,"[{'spots': '151', 'zone_id': '1', 'occupancy':...",1,"{'loop': '215033', 'total': '0', 'monthlies': ...",2023-12-29T00:23:59,14,West Ryde Car Park,211420TPR001
4,211420,757085641,151,"[{'spots': '151', 'zone_id': '1', 'occupancy':...",1,"{'loop': '215033', 'total': '0', 'monthlies': ...",2023-12-29T00:34:01,14,West Ryde Car Park,211420TPR001


In [26]:
carparks = pd.read_json("data/carparks.json")
carparks

ValueError: If using all scalar values, you must pass an index