# ABS - Australian Bureau of Statistics
## Lending Indicators Data

## Table 1. 
Households; Housing finance; Total dwellings; By property purpose; New loan commitments; Values

### Import Libraries

In [1]:
import pandas as pd

In [2]:


# Load CSV file
df_total_loans = pd.read_csv("../Datasets/df_total_loans.csv")
df_total_loans.head(5)

Unnamed: 0,date,segment,buyer_type,loan_count,loan_value_million
0,2002-09-01,Investor,Total,,11009.5
1,2002-09-01,Owner-occupier,First Home Buyer,24529.0,4121.6
2,2002-09-01,Owner-occupier,Non-First Home Buyer,64722.0,12468.5
3,2002-09-01,Owner-occupier,Total,89705.0,16698.5
4,2002-09-01,Total,Total,,27708.0


In [3]:
df_total_loans.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 465 entries, 0 to 464
Data columns (total 5 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   date                465 non-null    object 
 1   segment             465 non-null    object 
 2   buyer_type          465 non-null    object 
 3   loan_count          329 non-null    float64
 4   loan_value_million  465 non-null    float64
dtypes: float64(2), object(3)
memory usage: 18.3+ KB


In [4]:
# Value counts for segment
print("Segment value counts:")
print(df_total_loans["segment"].value_counts())

# Value counts for buyer_type
print("\nBuyer Type value counts:")
print(df_total_loans["buyer_type"].value_counts())


Segment value counts:
segment
Owner-occupier    279
Investor           93
Total              93
Name: count, dtype: int64

Buyer Type value counts:
buyer_type
Total                   279
First Home Buyer         93
Non-First Home Buyer     93
Name: count, dtype: int64


In [5]:
df_total_loans.describe()

Unnamed: 0,loan_count,loan_value_million
count,329.0,465.0
mean,61650.173252,28141.192043
std,31391.607683,18360.69988
min,18264.0,3845.5
25%,30609.0,15066.3
50%,60407.0,23852.3
75%,82555.0,37701.6
max,157887.0,98005.1


##### Convert date to datetime

In [6]:

df_total_loans["date"] = pd.to_datetime(df_total_loans["date"])

In [7]:
df_total_loans.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 465 entries, 0 to 464
Data columns (total 5 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   date                465 non-null    datetime64[ns]
 1   segment             465 non-null    object        
 2   buyer_type          465 non-null    object        
 3   loan_count          329 non-null    float64       
 4   loan_value_million  465 non-null    float64       
dtypes: datetime64[ns](1), float64(2), object(2)
memory usage: 18.3+ KB


##### Handling Missing Values

In [9]:
df_total_loans.isnull().sum()

date                    0
segment                 0
buyer_type              0
loan_count            136
loan_value_million      0
dtype: int64

loan_count

Non-null: 329 (~71%)

Missing values: 136 (~29%)

Reason for missing data:

Loan counts are not reported for aggregated “Total” rows. This is a structural missingness, not a data quality issue

In [48]:
# Drop rows where both segment and buyer_type are "Total"
df_clean = df_total_loans[~((df_total_loans["segment"] == "Total") & 
                             (df_total_loans["buyer_type"] == "Total"))].copy()


In [49]:
df_clean.head(50)

Unnamed: 0,date,segment,buyer_type,loan_count,loan_value_million
0,2002-09-01,Investor,Total,,11009.5
1,2002-09-01,Owner-occupier,First Home Buyer,24529.0,4121.6
2,2002-09-01,Owner-occupier,Non-First Home Buyer,64722.0,12468.5
3,2002-09-01,Owner-occupier,Total,89705.0,16698.5
5,2002-12-01,Investor,Total,,10978.6
6,2002-12-01,Owner-occupier,First Home Buyer,23090.0,4045.5
7,2002-12-01,Owner-occupier,Non-First Home Buyer,65802.0,13125.4
8,2002-12-01,Owner-occupier,Total,88932.0,17202.2
10,2003-03-01,Investor,Total,,12179.5
11,2003-03-01,Owner-occupier,First Home Buyer,22989.0,4172.1
