Start by importing all relevant modules

In [2]:
# Importing necessary libraries
# Basics
import pandas as pd
import numpy as np
import itertools
from io import StringIO
from datetime import datetime, timedelta

# Visualization libraries
import matplotlib.pyplot as plt
%matplotlib inline 
import plotly.express as px
import seaborn as sns
import matplotlib.patches as mpatches
from matplotlib.pylab import rcParams
import time

# Modeling libraries
import statsmodels.api as sm
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.arima.model import ARIMA        
from sklearn.metrics import mean_squared_error, r2_score
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.stattools import acf, pacf, adfuller
from sklearn.linear_model import LassoLarsCV
from sklearn.model_selection import TimeSeriesSplit 
from pmdarima import auto_arima      

from prophet import Prophet 

#Model deployment libraries
import joblib    


# Warnings
import warnings
from statsmodels.tools.sm_exceptions import ConvergenceWarning
warnings.simplefilter('ignore', ConvergenceWarning)
warnings.filterwarnings('ignore')

# Custom Options for displaying rows.
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns',100)

  from .autonotebook import tqdm as notebook_tqdm


Reading sample data from TfNSW:

In [3]:
df = pd.read_json("data/NSW response - 2022-03-13.json")
df.head()

Unnamed: 0,tsn,time,spots,zones,ParkID,occupancy,MessageDate,facility_id,facility_name,tfnsw_facility_id
0,211420,700405713,151,"[{'spots': '151', 'zone_id': '1', 'occupancy':...",1,"{'loop': '28302', 'total': '2', 'monthlies': N...",2022-03-13T00:08:33,14,West Ryde Car Park,211420TPR001
1,211420,700406314,151,"[{'spots': '151', 'zone_id': '1', 'occupancy':...",1,"{'loop': '28302', 'total': '2', 'monthlies': N...",2022-03-13T00:18:34,14,West Ryde Car Park,211420TPR001
2,211420,700406916,151,"[{'spots': '151', 'zone_id': '1', 'occupancy':...",1,"{'loop': '28302', 'total': '2', 'monthlies': N...",2022-03-13T00:28:36,14,West Ryde Car Park,211420TPR001
3,211420,700407517,151,"[{'spots': '151', 'zone_id': '1', 'occupancy':...",1,"{'loop': '28302', 'total': '2', 'monthlies': N...",2022-03-13T00:38:37,14,West Ryde Car Park,211420TPR001
4,211420,700408119,151,"[{'spots': '151', 'zone_id': '1', 'occupancy':...",1,"{'loop': '28302', 'total': '2', 'monthlies': N...",2022-03-13T00:48:39,14,West Ryde Car Park,211420TPR001


Looking at dataframe columns:

In [4]:
df.columns

Index(['tsn', 'time', 'spots', 'zones', 'ParkID', 'occupancy', 'MessageDate',
       'facility_id', 'facility_name', 'tfnsw_facility_id'],
      dtype='object')

And dataframe info

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 191 entries, 0 to 190
Data columns (total 10 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   tsn                191 non-null    int64 
 1   time               191 non-null    int64 
 2   spots              191 non-null    int64 
 3   zones              191 non-null    object
 4   ParkID             191 non-null    int64 
 5   occupancy          191 non-null    object
 6   MessageDate        191 non-null    object
 7   facility_id        191 non-null    int64 
 8   facility_name      191 non-null    object
 9   tfnsw_facility_id  191 non-null    object
dtypes: int64(5), object(5)
memory usage: 15.0+ KB


The values in the time column are given in seconds since year 2000. Converting it to more meaningful data that can be interpreted:

In [6]:
def convert_seconds_to_datetime(seconds_since_2000):
  ref_date = datetime(2000, 1, 1)
  
  datetime_val = ref_date + timedelta(seconds=seconds_since_2000)
  
  date = datetime_val.date()
  time = datetime_val.time()
  return date,time

In [7]:
df_copy = df.copy()

df_copy[['date','time']] = df['time'].apply(convert_seconds_to_datetime).apply(pd.Series)
df_copy.head()

Unnamed: 0,tsn,time,spots,zones,ParkID,occupancy,MessageDate,facility_id,facility_name,tfnsw_facility_id,date
0,211420,13:08:33,151,"[{'spots': '151', 'zone_id': '1', 'occupancy':...",1,"{'loop': '28302', 'total': '2', 'monthlies': N...",2022-03-13T00:08:33,14,West Ryde Car Park,211420TPR001,2022-03-12
1,211420,13:18:34,151,"[{'spots': '151', 'zone_id': '1', 'occupancy':...",1,"{'loop': '28302', 'total': '2', 'monthlies': N...",2022-03-13T00:18:34,14,West Ryde Car Park,211420TPR001,2022-03-12
2,211420,13:28:36,151,"[{'spots': '151', 'zone_id': '1', 'occupancy':...",1,"{'loop': '28302', 'total': '2', 'monthlies': N...",2022-03-13T00:28:36,14,West Ryde Car Park,211420TPR001,2022-03-12
3,211420,13:38:37,151,"[{'spots': '151', 'zone_id': '1', 'occupancy':...",1,"{'loop': '28302', 'total': '2', 'monthlies': N...",2022-03-13T00:38:37,14,West Ryde Car Park,211420TPR001,2022-03-12
4,211420,13:48:39,151,"[{'spots': '151', 'zone_id': '1', 'occupancy':...",1,"{'loop': '28302', 'total': '2', 'monthlies': N...",2022-03-13T00:48:39,14,West Ryde Car Park,211420TPR001,2022-03-12


Creating column with day of the week

In [8]:
df_copy['day_of_week'] = pd.to_datetime(df_copy['date']).apply(lambda x: x.strftime('%A'))
df_copy.head()

Unnamed: 0,tsn,time,spots,zones,ParkID,occupancy,MessageDate,facility_id,facility_name,tfnsw_facility_id,date,day_of_week
0,211420,13:08:33,151,"[{'spots': '151', 'zone_id': '1', 'occupancy':...",1,"{'loop': '28302', 'total': '2', 'monthlies': N...",2022-03-13T00:08:33,14,West Ryde Car Park,211420TPR001,2022-03-12,Saturday
1,211420,13:18:34,151,"[{'spots': '151', 'zone_id': '1', 'occupancy':...",1,"{'loop': '28302', 'total': '2', 'monthlies': N...",2022-03-13T00:18:34,14,West Ryde Car Park,211420TPR001,2022-03-12,Saturday
2,211420,13:28:36,151,"[{'spots': '151', 'zone_id': '1', 'occupancy':...",1,"{'loop': '28302', 'total': '2', 'monthlies': N...",2022-03-13T00:28:36,14,West Ryde Car Park,211420TPR001,2022-03-12,Saturday
3,211420,13:38:37,151,"[{'spots': '151', 'zone_id': '1', 'occupancy':...",1,"{'loop': '28302', 'total': '2', 'monthlies': N...",2022-03-13T00:38:37,14,West Ryde Car Park,211420TPR001,2022-03-12,Saturday
4,211420,13:48:39,151,"[{'spots': '151', 'zone_id': '1', 'occupancy':...",1,"{'loop': '28302', 'total': '2', 'monthlies': N...",2022-03-13T00:48:39,14,West Ryde Car Park,211420TPR001,2022-03-12,Saturday


Reordering columns:

In [9]:
df_copy = df_copy[['tsn','day_of_week','date','time','spots','zones','ParkID','occupancy','MessageDate','facility_id','facility_name','tfnsw_facility_id']]
df_copy.head()

Unnamed: 0,tsn,day_of_week,date,time,spots,zones,ParkID,occupancy,MessageDate,facility_id,facility_name,tfnsw_facility_id
0,211420,Saturday,2022-03-12,13:08:33,151,"[{'spots': '151', 'zone_id': '1', 'occupancy':...",1,"{'loop': '28302', 'total': '2', 'monthlies': N...",2022-03-13T00:08:33,14,West Ryde Car Park,211420TPR001
1,211420,Saturday,2022-03-12,13:18:34,151,"[{'spots': '151', 'zone_id': '1', 'occupancy':...",1,"{'loop': '28302', 'total': '2', 'monthlies': N...",2022-03-13T00:18:34,14,West Ryde Car Park,211420TPR001
2,211420,Saturday,2022-03-12,13:28:36,151,"[{'spots': '151', 'zone_id': '1', 'occupancy':...",1,"{'loop': '28302', 'total': '2', 'monthlies': N...",2022-03-13T00:28:36,14,West Ryde Car Park,211420TPR001
3,211420,Saturday,2022-03-12,13:38:37,151,"[{'spots': '151', 'zone_id': '1', 'occupancy':...",1,"{'loop': '28302', 'total': '2', 'monthlies': N...",2022-03-13T00:38:37,14,West Ryde Car Park,211420TPR001
4,211420,Saturday,2022-03-12,13:48:39,151,"[{'spots': '151', 'zone_id': '1', 'occupancy':...",1,"{'loop': '28302', 'total': '2', 'monthlies': N...",2022-03-13T00:48:39,14,West Ryde Car Park,211420TPR001


Dropping columns not needed now:

In [10]:
df_copy.drop(['tfnsw_facility_id', 'ParkID','MessageDate', 'facility_id'], axis=1, inplace=True)
df_copy.head()

Unnamed: 0,tsn,day_of_week,date,time,spots,zones,occupancy,facility_name
0,211420,Saturday,2022-03-12,13:08:33,151,"[{'spots': '151', 'zone_id': '1', 'occupancy':...","{'loop': '28302', 'total': '2', 'monthlies': N...",West Ryde Car Park
1,211420,Saturday,2022-03-12,13:18:34,151,"[{'spots': '151', 'zone_id': '1', 'occupancy':...","{'loop': '28302', 'total': '2', 'monthlies': N...",West Ryde Car Park
2,211420,Saturday,2022-03-12,13:28:36,151,"[{'spots': '151', 'zone_id': '1', 'occupancy':...","{'loop': '28302', 'total': '2', 'monthlies': N...",West Ryde Car Park
3,211420,Saturday,2022-03-12,13:38:37,151,"[{'spots': '151', 'zone_id': '1', 'occupancy':...","{'loop': '28302', 'total': '2', 'monthlies': N...",West Ryde Car Park
4,211420,Saturday,2022-03-12,13:48:39,151,"[{'spots': '151', 'zone_id': '1', 'occupancy':...","{'loop': '28302', 'total': '2', 'monthlies': N...",West Ryde Car Park


Converting the zones column to its own dataframe:

In [11]:
# sample = pd.DataFrame(columns=['spots','zone_id','zone_name','parent_zone_id','occupancy'])

# for key,value in df['zones'].items():
#     val = pd.json_normalize(value)
#     sample = sample.append(val)

# sample.head()