In [2]:
import requests
import json
import pandas as pd
import numpy as np

from datetime import datetime,timedelta

In [3]:
import dotenv
import os

# Load secret file
dotenv.load_dotenv('.env')

headers = {
  "Authorization":f"apikey {os.environ.get('apikey')}"
}
url_carparks = 'https://api.transport.nsw.gov.au/v1/carpark'
# url_carpark_history = 'https://api.transport.nsw.gov.au/v1/carpark/history'

list_of_carparks = requests.get(url_carparks, headers=headers).json()

# Create a json file and write the data if it doesn't exist
if not os.path.exists('./data/carparks.json'):
  with open('./data/carparks.json', 'w') as f:
    json.dumps(list_of_carparks, f)

# The format of the file itself is a dictionary, thus requiring some creativity to read
with open('./data/carparks.json') as f:
  data = json.load(f)

# Convert the dictionary to a dataframe
df_carparks = pd.DataFrame.from_dict(data, orient='index')
# Resetting the index to label the columns afterwards
df_carparks = df_carparks.reset_index()
df_carparks.columns = ['facility_id', 'CarParkName']

df_carparks

carpark_details_array=[]
for index, row in df_carparks.iterrows():
  facility = row['facility_id']
  url = f'https://api.transport.nsw.gov.au/v1/carpark?facility={facility}'


  # Make request
  response = requests.get(url, headers=headers).json()

  # Add to array
  carpark_details_array.append(response)

carpark_details_array

[{'tsn': '2155384',
  'time': '742877319',
  'spots': '1004',
  'zones': [{'spots': '152',
    'zone_id': 'CPS-CUD1',
    'occupancy': {'loop': None,
     'total': '150',
     'monthlies': None,
     'open_gate': None,
     'transients': None},
    'zone_name': 'Tallawong Station At-Grade A Car Park',
    'parent_zone_id': '0'},
   {'spots': '455',
    'zone_id': 'CPS-CUD2',
    'occupancy': {'loop': None,
     'total': '455',
     'monthlies': None,
     'open_gate': None,
     'transients': None},
    'zone_name': 'Tallawong Station At-Grade B Car Park',
    'parent_zone_id': '0'},
   {'spots': '397',
    'zone_id': 'CPS-CUD3',
    'occupancy': {'loop': None,
     'total': '376',
     'monthlies': None,
     'open_gate': None,
     'transients': None},
    'zone_name': 'Tallawong Station At-Grade D Car Park',
    'parent_zone_id': '0'}],
  'ParkID': '1',
  'occupancy': {'loop': None,
   'total': '981',
   'monthlies': None,
   'open_gate': None,
   'transients': None},
  'MessageDate

In [4]:
# Saving carpark details to file
with open('./data/carpark_details.json', 'w') as f:
  json.dump(carpark_details_array, f)

# Reading as a dataframe
df_carpark_details = pd.read_json('./data/carpark_details.json')
df_carpark_details

Unnamed: 0,tsn,time,spots,zones,ParkID,occupancy,MessageDate,facility_id,facility_name,tfnsw_facility_id
0,2155384,742877319,1004,"[{'spots': '152', 'zone_id': 'CPS-CUD1', 'occu...",1,"{'loop': None, 'total': '981', 'monthlies': No...",2023-07-17T12:48:39,1,Tallawong Station Car Park,2155384CCP001
1,2101131,760522347,244,"[{'spots': '244', 'zone_id': '1', 'occupancy':...",1,"{'loop': None, 'total': '74', 'monthlies': Non...",2024-02-06T19:12:27,10,Warriewood Car Park,2101131TPR001
2,2101130,760522396,46,"[{'spots': '46', 'zone_id': '1', 'occupancy': ...",1,"{'loop': '63979', 'total': '8', 'monthlies': N...",2024-02-06T19:13:16,11,Narrabeen Car Park,2101130TPR001
3,2103108,760522542,68,"[{'spots': '68', 'zone_id': '1', 'occupancy': ...",1,"{'loop': None, 'total': '42', 'monthlies': Non...",2024-02-06T19:15:42,12,Mona Vale Car Park,2103108TPR001
4,2099207,760522507,117,"[{'spots': '117', 'zone_id': '1', 'occupancy':...",1,"{'loop': '53466', 'total': '43', 'monthlies': ...",2024-02-06T19:15:07,13,Dee Why Car Park,2099207TPR001
5,211420,760522288,151,"[{'spots': '151', 'zone_id': '1', 'occupancy':...",1,"{'loop': '228450', 'total': '27', 'monthlies':...",2024-02-06T19:11:28,14,West Ryde Car Park,211420TPR001
6,223210,760522519,373,"[{'spots': '373', 'zone_id': '1', 'occupancy':...",1,"{'loop': None, 'total': '42', 'monthlies': Non...",2024-02-06T19:15:19,15,Sutherland East Parade Car Park,223210TPR001
7,217933,760522565,1884,"[{'spots': '1884', 'zone_id': '1', 'occupancy'...",1,"{'loop': '811930', 'total': '365', 'monthlies'...",2024-02-06T19:16:05,16,Leppington Car Park,217933TPR001
8,217426,760522560,1429,"[{'spots': '1429', 'zone_id': '1', 'occupancy'...",1,"{'loop': '742943', 'total': '148', 'monthlies'...",2024-02-06T19:16:00,17,Edmondson Park South Car Park,217426TPR001
9,276010,760522575,682,"[{'spots': '682', 'zone_id': '1', 'occupancy':...",1,"{'loop': None, 'total': '116', 'monthlies': No...",2024-02-06T19:16:15,18,St Marys Car Park,276010TPR001


Deleting facility ids 486-490 as they will not be used. The source of the info indicates they contain inaccurate data for the columns that will be needed.

In [5]:
# Dropping rows 28,29,30,31,32
df_carpark_details.drop(index=[28,29,30,31,32],inplace=True)

# Resetting the index
df_carpark_details.reset_index(drop=True,inplace=True)

# Sorting by facility_id
df_carpark_details.sort_values(by='facility_id',inplace=True)
df_carpark_details

Unnamed: 0,tsn,time,spots,zones,ParkID,occupancy,MessageDate,facility_id,facility_name,tfnsw_facility_id
0,2155384,742877319,1004,"[{'spots': '152', 'zone_id': 'CPS-CUD1', 'occu...",1,"{'loop': None, 'total': '981', 'monthlies': No...",2023-07-17T12:48:39,1,Tallawong Station Car Park,2155384CCP001
11,2155382,742877319,1374,"[{'spots': '368', 'zone_id': 'CPS-KVE1', 'occu...",1,"{'loop': None, 'total': '1363', 'monthlies': N...",2023-07-17T12:48:39,2,Kellyville Station Car Park,2155382CCP001
22,2153478,742877319,800,"[{'spots': '800', 'zone_id': 'CPS-BLV', 'occup...",1,"{'loop': None, 'total': '314', 'monthlies': No...",2023-07-17T12:48:39,3,Bella Vista Station Car Park,2153478CCP001
27,2154392,742877319,600,"[{'spots': '600', 'zone_id': 'CPS-SHW', 'occup...",1,"{'loop': None, 'total': '532', 'monthlies': No...",2023-07-17T12:48:39,4,Hills Showground Station Car Park,2154392CCP001
28,2126158,742877319,400,"[{'spots': '400', 'zone_id': 'CPS-CHE', 'occup...",1,"{'loop': None, 'total': '400', 'monthlies': No...",2023-07-17T12:48:39,5,Cherrybrook Station Car Park,2126158CCP001
29,207210,760522575,213,"[{'spots': '213', 'zone_id': '1', 'occupancy':...",1,"{'loop': None, 'total': '38', 'monthlies': Non...",2024-02-06T19:16:15,6,Gordon Henry St North Car Park,207210TPR001
30,253330,760522629,42,"[{'spots': '42', 'zone_id': '1', 'occupancy': ...",1,"{'loop': None, 'total': '-1', 'monthlies': Non...",2024-02-06T19:17:09,7,Kiama Car Park,253330TPR001
31,225040,760522416,1057,"[{'spots': '1057', 'zone_id': '1', 'occupancy'...",1,"{'loop': None, 'total': '264', 'monthlies': No...",2024-02-06T19:13:36,8,Gosford Car Park,225040TPR001
32,221210,760522566,934,"[{'spots': '934', 'zone_id': '1', 'occupancy':...",1,"{'loop': '322663', 'total': '116', 'monthlies'...",2024-02-06T19:16:06,9,Revesby Car Park,221210TPR001
1,2101131,760522347,244,"[{'spots': '244', 'zone_id': '1', 'occupancy':...",1,"{'loop': None, 'total': '74', 'monthlies': Non...",2024-02-06T19:12:27,10,Warriewood Car Park,2101131TPR001


Attempting to create a date_time function that gives the dates of all days in an array from 31st December 2023 to a target date that is separated by a time delta

In [6]:
def date_getter(td):
  # Array that stores the dates to be searched for
  date_period_array = []
  
  # The last date to be searched for
  cutoff_date = datetime(2023,12,31)
  target_date = cutoff_date - td
  
  # Ensure that records of each day are obtained
  delta = timedelta(days=1)
  
  while target_date <= cutoff_date:
    date_period_array.append(target_date.strftime("%Y-%m-%d"))
    target_date += delta

  return date_period_array

In [7]:
import os

def carpark_history_period(facility, dates_array):
    data_array = []
    json_file_path = f"./data/carpark_history/facility_{facility}.json"
    
    # Request header
    headers = {
        "Authorization":f"apikey {os.environ.get('apikey')}"
    }
    
    # Deleting file if it exists
    if os.path.exists(json_file_path):
        os.remove(json_file_path)
    
    # Make a request for each date
    for date in dates_array:
        url = f'https://api.transport.nsw.gov.au/v1/carpark/history?facility={facility}&eventdate={date}'
    
        response = requests.get(url, headers=headers).json()
    
        if data_array == []:
            data_array = response
        else:
            data_array = data_array + response
    
    # Saving data to json file
    with open(json_file_path, 'w') as f:
        json.dump(data_array, f)

    # Reading json file
    with open(json_file_path) as f:
        data = json.load(f)
    
    # Converting read data into a pandas dataframe
    return pd.DataFrame(data)

In [8]:
# for key,row in df_carparks.iterrows():
#   # Get the facility id
#   facility = row['facility_id']
  
#   # Save file with carpark history for a month
#   carpark_history_period(facility=facility, dates_array=date_getter(td=timedelta(days=30)))
  
#   # Feedback once done
#   print(f"Created file for facility {facility}")

# df.head()

Displaying facilities and their id's

In [9]:
df_carparks

Unnamed: 0,facility_id,CarParkName
0,1,Tallawong Station Car Park (historical only)
1,10,Warriewood Car Park
2,11,Narrabeen Car Park
3,12,Mona Vale Car Park
4,13,Dee Why Car Park
5,14,West Ryde Car Park
6,15,Sutherland East Parade Car Park
7,16,Leppington Car Park
8,17,Edmondson Park South Car Park
9,18,St Marys Car Park


Inspecting the `carpark_details.json` data

In [10]:
park_details = pd.read_json("data/carpark_details.json")
park_details.head()

Unnamed: 0,tsn,time,spots,zones,ParkID,occupancy,MessageDate,facility_id,facility_name,tfnsw_facility_id
0,2155384,742877319,1004,"[{'spots': '152', 'zone_id': 'CPS-CUD1', 'occu...",1,"{'loop': None, 'total': '981', 'monthlies': No...",2023-07-17T12:48:39,1,Tallawong Station Car Park,2155384CCP001
1,2101131,760522347,244,"[{'spots': '244', 'zone_id': '1', 'occupancy':...",1,"{'loop': None, 'total': '74', 'monthlies': Non...",2024-02-06T19:12:27,10,Warriewood Car Park,2101131TPR001
2,2101130,760522396,46,"[{'spots': '46', 'zone_id': '1', 'occupancy': ...",1,"{'loop': '63979', 'total': '8', 'monthlies': N...",2024-02-06T19:13:16,11,Narrabeen Car Park,2101130TPR001
3,2103108,760522542,68,"[{'spots': '68', 'zone_id': '1', 'occupancy': ...",1,"{'loop': None, 'total': '42', 'monthlies': Non...",2024-02-06T19:15:42,12,Mona Vale Car Park,2103108TPR001
4,2099207,760522507,117,"[{'spots': '117', 'zone_id': '1', 'occupancy':...",1,"{'loop': '53466', 'total': '43', 'monthlies': ...",2024-02-06T19:15:07,13,Dee Why Car Park,2099207TPR001


In [11]:
park_details.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 38 entries, 0 to 37
Data columns (total 10 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   tsn                38 non-null     int64 
 1   time               38 non-null     int64 
 2   spots              38 non-null     int64 
 3   zones              38 non-null     object
 4   ParkID             38 non-null     int64 
 5   occupancy          38 non-null     object
 6   MessageDate        38 non-null     object
 7   facility_id        38 non-null     int64 
 8   facility_name      38 non-null     object
 9   tfnsw_facility_id  38 non-null     object
dtypes: int64(5), object(5)
memory usage: 3.1+ KB


Inspecting the `carpark_history_facility_14.json`

In [12]:
park_history = pd.read_json("data/carpark_history_facility_14.json")
park_history.head()

Unnamed: 0,tsn,time,spots,zones,ParkID,occupancy,MessageDate,facility_id,facility_name,tfnsw_facility_id
0,211420,757083975,151,"[{'spots': '151', 'zone_id': '1', 'occupancy':...",1,"{'loop': '215031', 'total': '0', 'monthlies': ...",2023-12-29T00:06:15,14,West Ryde Car Park,211420TPR001
1,211420,757084211,151,"[{'spots': '151', 'zone_id': '1', 'occupancy':...",1,"{'loop': '215032', 'total': '1', 'monthlies': ...",2023-12-29T00:10:11,14,West Ryde Car Park,211420TPR001
2,211420,757084439,151,"[{'spots': '151', 'zone_id': '1', 'occupancy':...",1,"{'loop': '215033', 'total': '0', 'monthlies': ...",2023-12-29T00:13:59,14,West Ryde Car Park,211420TPR001
3,211420,757085039,151,"[{'spots': '151', 'zone_id': '1', 'occupancy':...",1,"{'loop': '215033', 'total': '0', 'monthlies': ...",2023-12-29T00:23:59,14,West Ryde Car Park,211420TPR001
4,211420,757085641,151,"[{'spots': '151', 'zone_id': '1', 'occupancy':...",1,"{'loop': '215033', 'total': '0', 'monthlies': ...",2023-12-29T00:34:01,14,West Ryde Car Park,211420TPR001


In [13]:
park_history.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 945 entries, 0 to 944
Data columns (total 10 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   tsn                945 non-null    int64 
 1   time               945 non-null    int64 
 2   spots              945 non-null    int64 
 3   zones              945 non-null    object
 4   ParkID             945 non-null    int64 
 5   occupancy          945 non-null    object
 6   MessageDate        945 non-null    object
 7   facility_id        945 non-null    int64 
 8   facility_name      945 non-null    object
 9   tfnsw_facility_id  945 non-null    object
dtypes: int64(5), object(5)
memory usage: 74.0+ KB


inspecting the `carparks.json` dataset

In [14]:
carparks = pd.read_json("data/carparks.json", orient= "index")
carparks.head()


Unnamed: 0,0
1,Tallawong Station Car Park (historical only)
10,Warriewood Car Park
11,Narrabeen Car Park
12,Mona Vale Car Park
13,Dee Why Car Park


In [15]:
carparks.info()

<class 'pandas.core.frame.DataFrame'>
Index: 38 entries, 1 to 9
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   0       38 non-null     object
dtypes: object(1)
memory usage: 608.0+ bytes


inspecting the `NSW response.json` file

In [16]:
nsw_response = pd.read_json("data/NSW response - 2022-03-13.json")
nsw_response.head()

Unnamed: 0,tsn,time,spots,zones,ParkID,occupancy,MessageDate,facility_id,facility_name,tfnsw_facility_id
0,211420,700405713,151,"[{'spots': '151', 'zone_id': '1', 'occupancy':...",1,"{'loop': '28302', 'total': '2', 'monthlies': N...",2022-03-13T00:08:33,14,West Ryde Car Park,211420TPR001
1,211420,700406314,151,"[{'spots': '151', 'zone_id': '1', 'occupancy':...",1,"{'loop': '28302', 'total': '2', 'monthlies': N...",2022-03-13T00:18:34,14,West Ryde Car Park,211420TPR001
2,211420,700406916,151,"[{'spots': '151', 'zone_id': '1', 'occupancy':...",1,"{'loop': '28302', 'total': '2', 'monthlies': N...",2022-03-13T00:28:36,14,West Ryde Car Park,211420TPR001
3,211420,700407517,151,"[{'spots': '151', 'zone_id': '1', 'occupancy':...",1,"{'loop': '28302', 'total': '2', 'monthlies': N...",2022-03-13T00:38:37,14,West Ryde Car Park,211420TPR001
4,211420,700408119,151,"[{'spots': '151', 'zone_id': '1', 'occupancy':...",1,"{'loop': '28302', 'total': '2', 'monthlies': N...",2022-03-13T00:48:39,14,West Ryde Car Park,211420TPR001


In [17]:
nsw_response.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 191 entries, 0 to 190
Data columns (total 10 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   tsn                191 non-null    int64 
 1   time               191 non-null    int64 
 2   spots              191 non-null    int64 
 3   zones              191 non-null    object
 4   ParkID             191 non-null    int64 
 5   occupancy          191 non-null    object
 6   MessageDate        191 non-null    object
 7   facility_id        191 non-null    int64 
 8   facility_name      191 non-null    object
 9   tfnsw_facility_id  191 non-null    object
dtypes: int64(5), object(5)
memory usage: 15.0+ KB


In [18]:
_park = pd.read_json("data/carpark_history/facility_15.json")
_park

Unnamed: 0,tsn,time,spots,zones,ParkID,occupancy,MessageDate,facility_id,facility_name,tfnsw_facility_id
0,223210,754664452,373,"[{'spots': '373', 'zone_id': '1', 'occupancy':...",1,"{'loop': None, 'total': '16', 'monthlies': Non...",2023-12-01T00:00:52,15,Sutherland East Parade Car Park,223210TPR001
1,223210,754665189,373,"[{'spots': '373', 'zone_id': '1', 'occupancy':...",1,"{'loop': None, 'total': '15', 'monthlies': Non...",2023-12-01T00:13:09,15,Sutherland East Parade Car Park,223210TPR001
2,223210,754665355,373,"[{'spots': '373', 'zone_id': '1', 'occupancy':...",1,"{'loop': None, 'total': '14', 'monthlies': Non...",2023-12-01T00:15:55,15,Sutherland East Parade Car Park,223210TPR001
3,223210,754665706,373,"[{'spots': '373', 'zone_id': '1', 'occupancy':...",1,"{'loop': None, 'total': '13', 'monthlies': Non...",2023-12-01T00:21:46,15,Sutherland East Parade Car Park,223210TPR001
4,223210,754665740,373,"[{'spots': '373', 'zone_id': '1', 'occupancy':...",1,"{'loop': None, 'total': '12', 'monthlies': Non...",2023-12-01T00:22:20,15,Sutherland East Parade Car Park,223210TPR001
...,...,...,...,...,...,...,...,...,...,...
18470,223210,757341099,373,"[{'spots': '373', 'zone_id': '1', 'occupancy':...",1,"{'loop': None, 'total': '98', 'monthlies': Non...",2023-12-31T23:31:39,15,Sutherland East Parade Car Park,223210TPR001
18471,223210,757341147,373,"[{'spots': '373', 'zone_id': '1', 'occupancy':...",1,"{'loop': None, 'total': '97', 'monthlies': Non...",2023-12-31T23:32:27,15,Sutherland East Parade Car Park,223210TPR001
18472,223210,757341211,373,"[{'spots': '373', 'zone_id': '1', 'occupancy':...",1,"{'loop': None, 'total': '96', 'monthlies': Non...",2023-12-31T23:33:31,15,Sutherland East Parade Car Park,223210TPR001
18473,223210,757341343,373,"[{'spots': '373', 'zone_id': '1', 'occupancy':...",1,"{'loop': None, 'total': '95', 'monthlies': Non...",2023-12-31T23:35:43,15,Sutherland East Parade Car Park,223210TPR001


In [19]:
_park.describe()

Unnamed: 0,tsn,time,spots,ParkID,facility_id
count,18475.0,18475.0,18475.0,18475.0,18475.0
mean,223210.0,755828400.0,373.0,1.0,15.0
std,0.0,704040.4,0.0,0.0,0.0
min,223210.0,754664500.0,373.0,1.0,15.0
25%,223210.0,755224800.0,373.0,1.0,15.0
50%,223210.0,755767100.0,373.0,1.0,15.0
75%,223210.0,756341000.0,373.0,1.0,15.0
max,223210.0,757342600.0,373.0,1.0,15.0


In [20]:
data_one = pd.read_json("data/carpark_history/facility_6.json")
data_one.head()

Unnamed: 0,tsn,time,spots,zones,ParkID,occupancy,MessageDate,facility_id,facility_name,tfnsw_facility_id
0,207210,754664994,213,"[{'spots': '213', 'zone_id': '1', 'occupancy':...",1,"{'loop': None, 'total': '9', 'monthlies': None...",2023-12-01T00:09:54,6,Gordon Henry St North Car Park,207210TPR001
1,207210,754665748,213,"[{'spots': '213', 'zone_id': '1', 'occupancy':...",1,"{'loop': None, 'total': '8', 'monthlies': None...",2023-12-01T00:22:28,6,Gordon Henry St North Car Park,207210TPR001
2,207210,754666073,213,"[{'spots': '213', 'zone_id': '1', 'occupancy':...",1,"{'loop': None, 'total': '7', 'monthlies': None...",2023-12-01T00:27:53,6,Gordon Henry St North Car Park,207210TPR001
3,207210,754666646,213,"[{'spots': '213', 'zone_id': '1', 'occupancy':...",1,"{'loop': None, 'total': '6', 'monthlies': None...",2023-12-01T00:37:26,6,Gordon Henry St North Car Park,207210TPR001
4,207210,754667165,213,"[{'spots': '213', 'zone_id': '1', 'occupancy':...",1,"{'loop': None, 'total': '5', 'monthlies': None...",2023-12-01T00:46:05,6,Gordon Henry St North Car Park,207210TPR001


In [21]:
data_15 = pd.read_json("data/carpark_history/facility_15.json")
data_16 = pd.read_json("data/carpark_history/facility_16.json")
data_17 = pd.read_json("data/carpark_history/facility_17.json")
data_18 = pd.read_json("data/carpark_history/facility_18.json")
data_19 = pd.read_json("data/carpark_history/facility_19.json")