In [47]:
# Imports: regex utilities, pandas, and pathlib for file paths
import re
import pandas as pd
from pathlib import Path



In [48]:
# Source Excel file path for private sector vacancies
file_path = Path('../data/Actual/raw/private_sector.xlsx')

In [49]:
# Load the raw worksheet into a DataFrame
#   - sheet_name: source tab
#   - header: first row contains column names
df = pd.read_excel(file_path, sheet_name="Data1", header=0)

In [50]:
# Quick peek at the first 15 rows to inspect header/data lines
df.head(15)

Unnamed: 0.1,Unnamed: 0,Job Vacancies ; Private ; New South Wales ;,Job Vacancies ; Private ; Victoria ;,Job Vacancies ; Private ; Queensland ;,Job Vacancies ; Private ; South Australia ;,Job Vacancies ; Private ; Western Australia ;,Job Vacancies ; Private ; Tasmania ;,Job Vacancies ; Private ; Northern Territory ;,Job Vacancies ; Private ; Australian Capital Territory ;,Job Vacancies ; Private ; Australia ;,...,Job Vacancies ; Private ; Australia ;.2,Standard Error of Job Vacancies ; Private ; New South Wales ;,Standard Error of Job Vacancies ; Private ; Victoria ;,Standard Error of Job Vacancies ; Private ; Queensland ;,Standard Error of Job Vacancies ; Private ; South Australia ;,Standard Error of Job Vacancies ; Private ; Western Australia ;,Standard Error of Job Vacancies ; Private ; Tasmania ;,Standard Error of Job Vacancies ; Private ; Northern Territory ;,Standard Error of Job Vacancies ; Private ; Australian Capital Territory ;,Standard Error of Job Vacancies ; Private ; Australia ;
0,Unit,000,000,000,000,000,000,000,000,000,...,000,000,000,000,000,000,000,000,000,000
1,Series Type,Original,Original,Original,Original,Original,Original,Original,Original,Original,...,Trend,Original,Original,Original,Original,Original,Original,Original,Original,Original
2,Data Type,STOCK,STOCK,STOCK,STOCK,STOCK,STOCK,STOCK,STOCK,STOCK,...,STOCK,RATIO,RATIO,RATIO,RATIO,RATIO,RATIO,RATIO,RATIO,RATIO
3,Frequency,Quarter,Quarter,Quarter,Quarter,Quarter,Quarter,Quarter,Quarter,Quarter,...,Quarter,Quarter,Quarter,Quarter,Quarter,Quarter,Quarter,Quarter,Quarter,Quarter
4,Collection Month,2,2,2,2,2,2,2,2,2,...,2,2,2,2,2,2,2,2,2,2
5,Series Start,1983-11-15 00:00:00,1983-11-15 00:00:00,1983-11-15 00:00:00,1983-11-15 00:00:00,1983-11-15 00:00:00,1983-11-15 00:00:00,1983-11-15 00:00:00,1983-11-15 00:00:00,1979-05-15 00:00:00,...,1979-05-15 00:00:00,1983-11-15 00:00:00,1983-11-15 00:00:00,1983-11-15 00:00:00,1983-11-15 00:00:00,1983-11-15 00:00:00,1983-11-15 00:00:00,1983-11-15 00:00:00,1983-11-15 00:00:00,1979-05-15 00:00:00
6,Series End,2025-05-15 00:00:00,2025-05-15 00:00:00,2025-05-15 00:00:00,2025-05-15 00:00:00,2025-05-15 00:00:00,2025-05-15 00:00:00,2025-05-15 00:00:00,2025-05-15 00:00:00,2025-05-15 00:00:00,...,2025-05-15 00:00:00,2025-05-15 00:00:00,2025-05-15 00:00:00,2025-05-15 00:00:00,2025-05-15 00:00:00,2025-05-15 00:00:00,2025-05-15 00:00:00,2025-05-15 00:00:00,2025-05-15 00:00:00,2025-05-15 00:00:00
7,No. Obs,167,167,167,167,167,167,167,167,185,...,185,167,167,167,167,167,167,167,167,185
8,Series ID,A590702K,A590720R,A590738L,A590756T,A590774W,A590792A,A590810V,A590828T,A590684T,...,A590688A,A590703L,A590721T,A590739R,A590757V,A590775X,A590793C,A590811W,A590829V,A590685V
9,1979-05-15 00:00:00,,,,,,,,,28.6,...,31.2,,,,,,,,,


In [51]:
# Drop the first 9 rows (metadata in the sheet) and reset the index
df = df.iloc[9:].reset_index(drop=True)

In [52]:
# Inspect column names, dtypes, and non-null counts after trimming header rows
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 185 entries, 0 to 184
Data columns (total 21 columns):
 #   Column                                                                        Non-Null Count  Dtype 
---  ------                                                                        --------------  ----- 
 0   Unnamed: 0                                                                    185 non-null    object
 1   Job Vacancies ;  Private ;  New South Wales ;                                 162 non-null    object
 2   Job Vacancies ;  Private ;  Victoria ;                                        162 non-null    object
 3   Job Vacancies ;  Private ;  Queensland ;                                      162 non-null    object
 4   Job Vacancies ;  Private ;  South Australia ;                                 162 non-null    object
 5   Job Vacancies ;  Private ;  Western Australia ;                               161 non-null    object
 6   Job Vacancies ;  Private ;  Tasmania ;    

In [53]:
# Rename the first column to a canonical "Date" and parse as datetime
time_col = df.columns[0]
df = df.rename(columns={time_col: "Date"})
df["Date"] = pd.to_datetime(df["Date"], errors="coerce")

In [54]:
# Validate that "Date" is datetime and review missingness after parsing
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 185 entries, 0 to 184
Data columns (total 21 columns):
 #   Column                                                                        Non-Null Count  Dtype         
---  ------                                                                        --------------  -----         
 0   Date                                                                          185 non-null    datetime64[ns]
 1   Job Vacancies ;  Private ;  New South Wales ;                                 162 non-null    object        
 2   Job Vacancies ;  Private ;  Victoria ;                                        162 non-null    object        
 3   Job Vacancies ;  Private ;  Queensland ;                                      162 non-null    object        
 4   Job Vacancies ;  Private ;  South Australia ;                                 162 non-null    object        
 5   Job Vacancies ;  Private ;  Western Australia ;                               161 non-null  

In [55]:
# Clean column names into consistent, concise identifiers
#  - Standard errors -> prefix with "SE_" and suffix "_Private"
#  - Job Vacancies -> "<State>_Private"; handle Trend/Seasonal special cases
#  - Otherwise keep name unchanged
# This simplifies downstream selection and plotting.
def clean_col(name: str):
    name = str(name).strip()
    if name.startswith("Standard Error of Job Vacancies"):
        m = re.search(r"Standard Error of Job Vacancies\s*;\s*Private\s*;\s*(.*?)\s*;", name)
        state = m.group(1) if m else name
        return f"SE_{state}_Private"
    if name.startswith("Job Vacancies"):
        if "Seasonally Adjusted" in name:
            return "Australia_Private_Seasonal"
        if "Trend" in name:
            return "Australia_Private_Trend"
        m = re.search(r"Job Vacancies\s*;\s*Private\s*;\s*(.*?)\s*;", name)
        state = m.group(1) if m else name
        return f"{state}_Private"
    return name

# Apply the cleaner and inspect the new schema
df = df.rename(columns={c: clean_col(c) for c in df.columns})
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 185 entries, 0 to 184
Data columns (total 21 columns):
 #   Column                                   Non-Null Count  Dtype         
---  ------                                   --------------  -----         
 0   Date                                     185 non-null    datetime64[ns]
 1   New South Wales_Private                  162 non-null    object        
 2   Victoria_Private                         162 non-null    object        
 3   Queensland_Private                       162 non-null    object        
 4   South Australia_Private                  162 non-null    object        
 5   Western Australia_Private                161 non-null    object        
 6   Tasmania_Private                         155 non-null    object        
 7   Northern Territory_Private               156 non-null    object        
 8   Australian Capital Territory_Private     151 non-null    object        
 9   Australia_Private                        18

In [56]:
# Remove any duplicated columns that resulted from the raw sheet structure
# Keep the first occurrence and copy to avoid chained-assignment warnings
df = df.loc[:, ~df.columns.duplicated()].copy()
# Verify final columns and non-null counts
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 185 entries, 0 to 184
Data columns (total 19 columns):
 #   Column                                   Non-Null Count  Dtype         
---  ------                                   --------------  -----         
 0   Date                                     185 non-null    datetime64[ns]
 1   New South Wales_Private                  162 non-null    object        
 2   Victoria_Private                         162 non-null    object        
 3   Queensland_Private                       162 non-null    object        
 4   South Australia_Private                  162 non-null    object        
 5   Western Australia_Private                161 non-null    object        
 6   Tasmania_Private                         155 non-null    object        
 7   Northern Territory_Private               156 non-null    object        
 8   Australian Capital Territory_Private     151 non-null    object        
 9   Australia_Private                        18

In [57]:
# Quick preview to confirm column order and first few rows after cleaning
df.head()

Unnamed: 0,Date,New South Wales_Private,Victoria_Private,Queensland_Private,South Australia_Private,Western Australia_Private,Tasmania_Private,Northern Territory_Private,Australian Capital Territory_Private,Australia_Private,SE_New South Wales_Private,SE_Victoria_Private,SE_Queensland_Private,SE_South Australia_Private,SE_Western Australia_Private,SE_Tasmania_Private,SE_Northern Territory_Private,SE_Australian Capital Territory_Private,SE_Australia_Private
0,1979-05-15,,,,,,,,,28.6,,,,,,,,,
1,1979-08-15,,,,,,,,,28.6,,,,,,,,,
2,1979-11-15,,,,,,,,,24.6,,,,,,,,,
3,1980-02-15,,,,,,,,,27.3,,,,,,,,,
4,1980-05-15,,,,,,,,,21.7,,,,,,,,,


In [58]:
# Filter to the analysis period starting Nov 1983 and reset the index
df = df[df["Date"] >= "1983-11-01"].reset_index(drop=True)
# Sanity-check after filtering
df.head()

Unnamed: 0,Date,New South Wales_Private,Victoria_Private,Queensland_Private,South Australia_Private,Western Australia_Private,Tasmania_Private,Northern Territory_Private,Australian Capital Territory_Private,Australia_Private,SE_New South Wales_Private,SE_Victoria_Private,SE_Queensland_Private,SE_South Australia_Private,SE_Western Australia_Private,SE_Tasmania_Private,SE_Northern Territory_Private,SE_Australian Capital Territory_Private,SE_Australia_Private
0,1983-11-15,10.0,4.9,4.0,2.0,3.5,0.5,0.3,0.5,25.8,,,,,,,,,
1,1984-02-15,20.8,9.5,4.0,3.6,0.0,0.0,0.3,0.4,38.7,,,,,,,,,
2,1984-05-15,14.6,8.4,3.7,2.3,0.0,0.0,0.5,0.7,30.2,,,,,,,,,
3,1984-08-15,16.2,7.3,3.7,2.5,2.3,0.4,0.4,0.3,33.0,,,,,,,,,
4,1984-11-15,11.6,11.3,3.9,2.2,3.5,,0.6,,33.6,2.3,2.0,0.9,0.4,0.9,,0.1,,3.3


In [59]:
# Count missing values per column to identify problematic series
df.isna().sum()

Date                                        0
New South Wales_Private                     5
Victoria_Private                            5
Queensland_Private                          5
South Australia_Private                     5
Western Australia_Private                   6
Tasmania_Private                           12
Northern Territory_Private                 11
Australian Capital Territory_Private       16
Australia_Private                           5
SE_New South Wales_Private                  9
SE_Victoria_Private                         9
SE_Queensland_Private                       9
SE_South Australia_Private                  9
SE_Western Australia_Private               10
SE_Tasmania_Private                        16
SE_Northern Territory_Private              15
SE_Australian Capital Territory_Private    20
SE_Australia_Private                        9
dtype: int64

In [60]:
# Column we require to be non-null while others are all null
keep_col = "Date"

# Build a boolean mask for rows where:
#  - every column EXCEPT `keep_col` is null
#  - and `keep_col` itself is non-null
mask = df.loc[:, df.columns != keep_col].isna().all(axis=1) & df[keep_col].notna()

# Preview the rows that match this condition (view only, no mutation)
rows = df.loc[mask]
rows

Unnamed: 0,Date,New South Wales_Private,Victoria_Private,Queensland_Private,South Australia_Private,Western Australia_Private,Tasmania_Private,Northern Territory_Private,Australian Capital Territory_Private,Australia_Private,SE_New South Wales_Private,SE_Victoria_Private,SE_Queensland_Private,SE_South Australia_Private,SE_Western Australia_Private,SE_Tasmania_Private,SE_Northern Territory_Private,SE_Australian Capital Territory_Private,SE_Australia_Private
99,2008-08-15,,,,,,,,,,,,,,,,,,
100,2008-11-15,,,,,,,,,,,,,,,,,,
101,2009-02-15,,,,,,,,,,,,,,,,,,
102,2009-05-15,,,,,,,,,,,,,,,,,,
103,2009-08-15,,,,,,,,,,,,,,,,,,


In [61]:
# Reuse the mask to identify rows to drop
mask = df.loc[:, df.columns != keep_col].isna().all(axis=1) & df[keep_col].notna()

# Count rows that satisfy the condition (for logging)
num_to_drop = int(mask.sum())

# Keep only rows that DO NOT match the mask (this drops the flagged rows)
df = df.loc[~mask].copy()

print(f"Dropped {num_to_drop} rows; new shape: {df.shape}")

Dropped 5 rows; new shape: (162, 19)


In [62]:
# Re-check missingness after dropping the fully-null rows (except Date)
df.isna().sum()

Date                                        0
New South Wales_Private                     0
Victoria_Private                            0
Queensland_Private                          0
South Australia_Private                     0
Western Australia_Private                   1
Tasmania_Private                            7
Northern Territory_Private                  6
Australian Capital Territory_Private       11
Australia_Private                           0
SE_New South Wales_Private                  4
SE_Victoria_Private                         4
SE_Queensland_Private                       4
SE_South Australia_Private                  4
SE_Western Australia_Private                5
SE_Tasmania_Private                        11
SE_Northern Territory_Private              10
SE_Australian Capital Territory_Private    15
SE_Australia_Private                        4
dtype: int64

In [63]:
# Inspect any rows that still contain at least one null value
rows_with_any_nulls = df[df.isna().any(axis=1)]
rows_with_any_nulls

Unnamed: 0,Date,New South Wales_Private,Victoria_Private,Queensland_Private,South Australia_Private,Western Australia_Private,Tasmania_Private,Northern Territory_Private,Australian Capital Territory_Private,Australia_Private,SE_New South Wales_Private,SE_Victoria_Private,SE_Queensland_Private,SE_South Australia_Private,SE_Western Australia_Private,SE_Tasmania_Private,SE_Northern Territory_Private,SE_Australian Capital Territory_Private,SE_Australia_Private
0,1983-11-15,10.0,4.9,4.0,2.0,3.5,0.5,0.3,0.5,25.8,,,,,,,,,
1,1984-02-15,20.8,9.5,4.0,3.6,0.0,0.0,0.3,0.4,38.7,,,,,,,,,
2,1984-05-15,14.6,8.4,3.7,2.3,0.0,0.0,0.5,0.7,30.2,,,,,,,,,
3,1984-08-15,16.2,7.3,3.7,2.5,2.3,0.4,0.4,0.3,33.0,,,,,,,,,
4,1984-11-15,11.6,11.3,3.9,2.2,3.5,,0.6,,33.6,2.3,2.0,0.9,0.4,0.9,,0.1,,3.3
7,1985-08-15,12.2,17.6,7.5,3.8,,0.6,0.7,,46.9,1.8,2.3,1.7,0.7,,0.1,0.2,,3.8
9,1986-02-15,19.2,17.1,8.5,3.3,3.7,1.3,0.7,,54.8,3.8,2.5,1.8,0.6,0.7,0.2,0.2,,5.0
11,1986-08-15,13.2,12.8,4.2,4.9,4.7,0.5,,,42.0,1.9,2.2,1.1,1.2,1.1,0.1,,,3.6
12,1986-11-15,17.3,12.2,5.4,3.8,4.0,,,,45.0,3.3,2.0,1.2,0.7,1.1,,,,4.2
13,1987-02-15,20.5,18.5,5.8,4.4,5.9,,,,57.6,3.3,2.8,1.3,0.8,1.3,,,,4.8


In [64]:
# Column-wise null counts again for reference
df.isna().sum()

Date                                        0
New South Wales_Private                     0
Victoria_Private                            0
Queensland_Private                          0
South Australia_Private                     0
Western Australia_Private                   1
Tasmania_Private                            7
Northern Territory_Private                  6
Australian Capital Territory_Private       11
Australia_Private                           0
SE_New South Wales_Private                  4
SE_Victoria_Private                         4
SE_Queensland_Private                       4
SE_South Australia_Private                  4
SE_Western Australia_Private                5
SE_Tasmania_Private                        11
SE_Northern Territory_Private              10
SE_Australian Capital Territory_Private    15
SE_Australia_Private                        4
dtype: int64

In [65]:
# Final preview of cleaned data before splitting into value and SE tables
df.head()

Unnamed: 0,Date,New South Wales_Private,Victoria_Private,Queensland_Private,South Australia_Private,Western Australia_Private,Tasmania_Private,Northern Territory_Private,Australian Capital Territory_Private,Australia_Private,SE_New South Wales_Private,SE_Victoria_Private,SE_Queensland_Private,SE_South Australia_Private,SE_Western Australia_Private,SE_Tasmania_Private,SE_Northern Territory_Private,SE_Australian Capital Territory_Private,SE_Australia_Private
0,1983-11-15,10.0,4.9,4.0,2.0,3.5,0.5,0.3,0.5,25.8,,,,,,,,,
1,1984-02-15,20.8,9.5,4.0,3.6,0.0,0.0,0.3,0.4,38.7,,,,,,,,,
2,1984-05-15,14.6,8.4,3.7,2.3,0.0,0.0,0.5,0.7,30.2,,,,,,,,,
3,1984-08-15,16.2,7.3,3.7,2.5,2.3,0.4,0.4,0.3,33.0,,,,,,,,,
4,1984-11-15,11.6,11.3,3.9,2.2,3.5,,0.6,,33.6,2.3,2.0,0.9,0.4,0.9,,0.1,,3.3


In [66]:
# Split into two DataFrames:
#  - vac_df: vacancy values (non-SE columns)
#  - se_df: standard error columns with Date
se_cols = [c for c in df.columns if c.startswith("SE_")]
vac_cols =  [c for c in df.columns if c not in se_cols and c != "Series ID" and c != "Unit"]
vac_df = df[vac_cols].copy()
se_df = pd.concat([df[["Date"]], df[se_cols]], axis=1)

In [67]:
# Confirm schema for vacancy values (pre-conversion)
vac_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 162 entries, 0 to 166
Data columns (total 10 columns):
 #   Column                                Non-Null Count  Dtype         
---  ------                                --------------  -----         
 0   Date                                  162 non-null    datetime64[ns]
 1   New South Wales_Private               162 non-null    object        
 2   Victoria_Private                      162 non-null    object        
 3   Queensland_Private                    162 non-null    object        
 4   South Australia_Private               162 non-null    object        
 5   Western Australia_Private             161 non-null    object        
 6   Tasmania_Private                      155 non-null    object        
 7   Northern Territory_Private            156 non-null    object        
 8   Australian Capital Territory_Private  151 non-null    object        
 9   Australia_Private                     162 non-null    object        
dtypes: date

In [68]:
# Preview vacancy values
vac_df.head()

Unnamed: 0,Date,New South Wales_Private,Victoria_Private,Queensland_Private,South Australia_Private,Western Australia_Private,Tasmania_Private,Northern Territory_Private,Australian Capital Territory_Private,Australia_Private
0,1983-11-15,10.0,4.9,4.0,2.0,3.5,0.5,0.3,0.5,25.8
1,1984-02-15,20.8,9.5,4.0,3.6,0.0,0.0,0.3,0.4,38.7
2,1984-05-15,14.6,8.4,3.7,2.3,0.0,0.0,0.5,0.7,30.2
3,1984-08-15,16.2,7.3,3.7,2.5,2.3,0.4,0.4,0.3,33.0
4,1984-11-15,11.6,11.3,3.9,2.2,3.5,,0.6,,33.6


In [69]:
# Preview standard error values
se_df.head()

Unnamed: 0,Date,SE_New South Wales_Private,SE_Victoria_Private,SE_Queensland_Private,SE_South Australia_Private,SE_Western Australia_Private,SE_Tasmania_Private,SE_Northern Territory_Private,SE_Australian Capital Territory_Private,SE_Australia_Private
0,1983-11-15,,,,,,,,,
1,1984-02-15,,,,,,,,,
2,1984-05-15,,,,,,,,,
3,1984-08-15,,,,,,,,,
4,1984-11-15,2.3,2.0,0.9,0.4,0.9,,0.1,,3.3


In [70]:
# Convert numeric-like columns from object to float for both frames
for frame in (vac_df, se_df):
    num_cols = frame.columns.drop("Date")
    for col in num_cols:
        frame[col] = pd.to_numeric(frame[col], errors="coerce")

In [71]:
# Validate dtypes are now numeric for vacancy values
vac_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 162 entries, 0 to 166
Data columns (total 10 columns):
 #   Column                                Non-Null Count  Dtype         
---  ------                                --------------  -----         
 0   Date                                  162 non-null    datetime64[ns]
 1   New South Wales_Private               162 non-null    float64       
 2   Victoria_Private                      162 non-null    float64       
 3   Queensland_Private                    162 non-null    float64       
 4   South Australia_Private               162 non-null    float64       
 5   Western Australia_Private             161 non-null    float64       
 6   Tasmania_Private                      155 non-null    float64       
 7   Northern Territory_Private            156 non-null    float64       
 8   Australian Capital Territory_Private  151 non-null    float64       
 9   Australia_Private                     162 non-null    float64       
dtypes: date

In [72]:
# Validate dtypes are now numeric for standard errors
se_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 162 entries, 0 to 166
Data columns (total 10 columns):
 #   Column                                   Non-Null Count  Dtype         
---  ------                                   --------------  -----         
 0   Date                                     162 non-null    datetime64[ns]
 1   SE_New South Wales_Private               158 non-null    float64       
 2   SE_Victoria_Private                      158 non-null    float64       
 3   SE_Queensland_Private                    158 non-null    float64       
 4   SE_South Australia_Private               158 non-null    float64       
 5   SE_Western Australia_Private             157 non-null    float64       
 6   SE_Tasmania_Private                      151 non-null    float64       
 7   SE_Northern Territory_Private            152 non-null    float64       
 8   SE_Australian Capital Territory_Private  147 non-null    float64       
 9   SE_Australia_Private                     158 non

In [73]:
# Save cleaned wide-form datasets to disk
# Save cleaned wide-form datasets to disk
vac_path = "../data/actual/preprocessed/private_vacancies_clean.csv"
se_path = "../data/actual/preprocessed/private_vacancies_standard_error.csv"
vac_df.to_csv(vac_path, index=False)
se_df.to_csv(se_path, index=False)

In [74]:
# Convert wide-form vacancies into a tidy long-form table
vac_long = vac_df.melt(id_vars="Date", var_name="Region", value_name="Vacancies_thousands")

# Persist long-form to CSV for downstream viz/analysis
vac_long_path = "../data/actual/preprocessed/private_vacancies_clean_long.csv"
vac_long.to_csv(vac_long_path, index=False)

In [75]:
# Reload the saved CSVs to verify outputs on disk
#  - data: wide-form vacancies
#  - se_data: standard errors (wide-form)
#  - data3: long-form vacancies
data = pd.read_csv(vac_path)
se_data = pd.read_csv(se_path)
data3 = pd.read_csv(vac_long_path)

In [76]:
# Baseline missingness summary (counts per column) before interpolation
data.isna().sum()

Date                                     0
New South Wales_Private                  0
Victoria_Private                         0
Queensland_Private                       0
South Australia_Private                  0
Western Australia_Private                1
Tasmania_Private                         7
Northern Territory_Private               6
Australian Capital Territory_Private    11
Australia_Private                        0
dtype: int64

In [77]:
# Attempt a simple linear interpolation over time for numeric gaps.
# Note: For object dtypes, call data.infer_objects(copy=False) before interpolating.
data.interpolate(method="linear", inplace=False)


  data.interpolate(method="linear", inplace=False)


Unnamed: 0,Date,New South Wales_Private,Victoria_Private,Queensland_Private,South Australia_Private,Western Australia_Private,Tasmania_Private,Northern Territory_Private,Australian Capital Territory_Private,Australia_Private
0,1983-11-15,10.0,4.9,4.0,2.0,3.5,0.5,0.3,0.5,25.8
1,1984-02-15,20.8,9.5,4.0,3.6,0.0,0.0,0.3,0.4,38.7
2,1984-05-15,14.6,8.4,3.7,2.3,0.0,0.0,0.5,0.7,30.2
3,1984-08-15,16.2,7.3,3.7,2.5,2.3,0.4,0.4,0.3,33.0
4,1984-11-15,11.6,11.3,3.9,2.2,3.5,0.5,0.6,0.8,33.6
...,...,...,...,...,...,...,...,...,...,...
157,2024-05-15,91.5,78.1,60.1,18.6,38.4,5.3,4.4,5.8,302.1
158,2024-08-15,90.3,75.2,61.2,16.3,38.3,5.2,4.1,5.8,296.3
159,2024-11-15,93.0,72.7,70.0,20.2,38.9,5.4,4.7,6.8,311.6
160,2025-02-15,89.6,67.1,63.1,18.1,40.6,4.3,4.4,5.1,292.5


In [78]:
# Check remaining missing values after any interpolation/transforms
data.isna().sum()

Date                                     0
New South Wales_Private                  0
Victoria_Private                         0
Queensland_Private                       0
South Australia_Private                  0
Western Australia_Private                1
Tasmania_Private                         7
Northern Territory_Private               6
Australian Capital Territory_Private    11
Australia_Private                        0
dtype: int64

In [79]:
# Persist the interpolated/cleaned wide-format vacancies back to CSV
data.to_csv(vac_path, index=False)
