# Feature Engineering
This notebook handles:
1. Separating Street Names and Types
2. Calculate Property Age
3. Calculate Average Room Size
4. Calculate Building Area to Land Area Ratio

In [1]:
import pandas as pd

df = pd.read_csv("../data/CLEANED_Melbourne_Housing_Market.csv")
df["SaleDate"] = pd.to_datetime(df["SaleDate"])
df.head()

Unnamed: 0,SaleDate,Suburb,Address,UnitType,SaleMethod,RealEstateAgent,CouncilArea,RegionName,Rooms,DistanceToCBD,...,Bedrooms,Bathrooms,CarSpots,LandSize,BuildingArea,YearBuilt,Latitude,Longitude,NeighbouringProperties,Price
0,2016-03-12,Abbotsford,85_Turner_St,House,Sold,Biggin,Yarra_City,Northern_Metropolitan,2.0,2.5,...,2.0,1.0,1.0,202.0,133.0,1970.0,-37.7996,144.9984,4019.0,1480000.0
1,2016-04-02,Abbotsford,25_Bloomburg_St,House,Sold,Biggin,Yarra_City,Northern_Metropolitan,2.0,2.5,...,2.0,1.0,0.0,156.0,79.0,1900.0,-37.8079,144.9934,4019.0,1035000.0
2,2017-04-03,Abbotsford,5_Charles_St,House,Sold_Prior,Biggin,Yarra_City,Northern_Metropolitan,3.0,2.5,...,3.0,2.0,0.0,134.0,150.0,1900.0,-37.8093,144.9944,4019.0,1465000.0
3,2017-04-03,Abbotsford,40_Federation_La,House,Passed_In,Biggin,Yarra_City,Northern_Metropolitan,3.0,2.5,...,3.0,2.0,1.0,94.0,133.0,1970.0,-37.7969,144.9969,4019.0,850000.0
4,2016-04-06,Abbotsford,55A_Park_St,House,Vendor_Bid,Nelson,Yarra_City,Northern_Metropolitan,4.0,2.5,...,3.0,1.0,2.0,120.0,142.0,2014.0,-37.8072,144.9941,4019.0,1600000.0


In [2]:
from data_manipulation import feature_engineering as fe

## Separating Street Names and Types

In [3]:
df[["StreetName", "StreetType"]] = df.apply(fe.separate_address, axis=1)
df["StreetType"] = df.apply(fe.get_full_street_type, axis=1)
df = fe.remove_column(df, "Address")
df.head()

Unnamed: 0,SaleDate,Suburb,UnitType,SaleMethod,RealEstateAgent,CouncilArea,RegionName,Rooms,DistanceToCBD,Postcode,...,CarSpots,LandSize,BuildingArea,YearBuilt,Latitude,Longitude,NeighbouringProperties,Price,StreetName,StreetType
0,2016-03-12,Abbotsford,House,Sold,Biggin,Yarra_City,Northern_Metropolitan,2.0,2.5,3067.0,...,1.0,202.0,133.0,1970.0,-37.7996,144.9984,4019.0,1480000.0,Turner,Street
1,2016-04-02,Abbotsford,House,Sold,Biggin,Yarra_City,Northern_Metropolitan,2.0,2.5,3067.0,...,0.0,156.0,79.0,1900.0,-37.8079,144.9934,4019.0,1035000.0,Bloomburg,Street
2,2017-04-03,Abbotsford,House,Sold_Prior,Biggin,Yarra_City,Northern_Metropolitan,3.0,2.5,3067.0,...,0.0,134.0,150.0,1900.0,-37.8093,144.9944,4019.0,1465000.0,Charles,Street
3,2017-04-03,Abbotsford,House,Passed_In,Biggin,Yarra_City,Northern_Metropolitan,3.0,2.5,3067.0,...,1.0,94.0,133.0,1970.0,-37.7969,144.9969,4019.0,850000.0,Federation,Lane
4,2016-04-06,Abbotsford,House,Vendor_Bid,Nelson,Yarra_City,Northern_Metropolitan,4.0,2.5,3067.0,...,2.0,120.0,142.0,2014.0,-37.8072,144.9941,4019.0,1600000.0,Park,Street


## Calculate Property Age

In [4]:
df["PropertyAge"] = df.apply(fe.calc_property_age, axis=1)
df.head()

Unnamed: 0,SaleDate,Suburb,UnitType,SaleMethod,RealEstateAgent,CouncilArea,RegionName,Rooms,DistanceToCBD,Postcode,...,LandSize,BuildingArea,YearBuilt,Latitude,Longitude,NeighbouringProperties,Price,StreetName,StreetType,PropertyAge
0,2016-03-12,Abbotsford,House,Sold,Biggin,Yarra_City,Northern_Metropolitan,2.0,2.5,3067.0,...,202.0,133.0,1970.0,-37.7996,144.9984,4019.0,1480000.0,Turner,Street,46.0
1,2016-04-02,Abbotsford,House,Sold,Biggin,Yarra_City,Northern_Metropolitan,2.0,2.5,3067.0,...,156.0,79.0,1900.0,-37.8079,144.9934,4019.0,1035000.0,Bloomburg,Street,116.0
2,2017-04-03,Abbotsford,House,Sold_Prior,Biggin,Yarra_City,Northern_Metropolitan,3.0,2.5,3067.0,...,134.0,150.0,1900.0,-37.8093,144.9944,4019.0,1465000.0,Charles,Street,117.0
3,2017-04-03,Abbotsford,House,Passed_In,Biggin,Yarra_City,Northern_Metropolitan,3.0,2.5,3067.0,...,94.0,133.0,1970.0,-37.7969,144.9969,4019.0,850000.0,Federation,Lane,47.0
4,2016-04-06,Abbotsford,House,Vendor_Bid,Nelson,Yarra_City,Northern_Metropolitan,4.0,2.5,3067.0,...,120.0,142.0,2014.0,-37.8072,144.9941,4019.0,1600000.0,Park,Street,2.0


## Calculate Average Room Size

In [5]:
df["AvgRoomSize"] = df.apply(fe.calc_avg_room_size, axis=1)
df.head()

Unnamed: 0,SaleDate,Suburb,UnitType,SaleMethod,RealEstateAgent,CouncilArea,RegionName,Rooms,DistanceToCBD,Postcode,...,BuildingArea,YearBuilt,Latitude,Longitude,NeighbouringProperties,Price,StreetName,StreetType,PropertyAge,AvgRoomSize
0,2016-03-12,Abbotsford,House,Sold,Biggin,Yarra_City,Northern_Metropolitan,2.0,2.5,3067.0,...,133.0,1970.0,-37.7996,144.9984,4019.0,1480000.0,Turner,Street,46.0,66.5
1,2016-04-02,Abbotsford,House,Sold,Biggin,Yarra_City,Northern_Metropolitan,2.0,2.5,3067.0,...,79.0,1900.0,-37.8079,144.9934,4019.0,1035000.0,Bloomburg,Street,116.0,39.5
2,2017-04-03,Abbotsford,House,Sold_Prior,Biggin,Yarra_City,Northern_Metropolitan,3.0,2.5,3067.0,...,150.0,1900.0,-37.8093,144.9944,4019.0,1465000.0,Charles,Street,117.0,50.0
3,2017-04-03,Abbotsford,House,Passed_In,Biggin,Yarra_City,Northern_Metropolitan,3.0,2.5,3067.0,...,133.0,1970.0,-37.7969,144.9969,4019.0,850000.0,Federation,Lane,47.0,44.333333
4,2016-04-06,Abbotsford,House,Vendor_Bid,Nelson,Yarra_City,Northern_Metropolitan,4.0,2.5,3067.0,...,142.0,2014.0,-37.8072,144.9941,4019.0,1600000.0,Park,Street,2.0,35.5


## Calculate Building Area to Land Area Ratio

In [6]:
df["BuildingToLandRatio"] = df.apply(fe.calc_building_to_land_ratio, axis=1)
df.head()

Unnamed: 0,SaleDate,Suburb,UnitType,SaleMethod,RealEstateAgent,CouncilArea,RegionName,Rooms,DistanceToCBD,Postcode,...,YearBuilt,Latitude,Longitude,NeighbouringProperties,Price,StreetName,StreetType,PropertyAge,AvgRoomSize,BuildingToLandRatio
0,2016-03-12,Abbotsford,House,Sold,Biggin,Yarra_City,Northern_Metropolitan,2.0,2.5,3067.0,...,1970.0,-37.7996,144.9984,4019.0,1480000.0,Turner,Street,46.0,66.5,0.658416
1,2016-04-02,Abbotsford,House,Sold,Biggin,Yarra_City,Northern_Metropolitan,2.0,2.5,3067.0,...,1900.0,-37.8079,144.9934,4019.0,1035000.0,Bloomburg,Street,116.0,39.5,0.50641
2,2017-04-03,Abbotsford,House,Sold_Prior,Biggin,Yarra_City,Northern_Metropolitan,3.0,2.5,3067.0,...,1900.0,-37.8093,144.9944,4019.0,1465000.0,Charles,Street,117.0,50.0,1.119403
3,2017-04-03,Abbotsford,House,Passed_In,Biggin,Yarra_City,Northern_Metropolitan,3.0,2.5,3067.0,...,1970.0,-37.7969,144.9969,4019.0,850000.0,Federation,Lane,47.0,44.333333,1.414894
4,2016-04-06,Abbotsford,House,Vendor_Bid,Nelson,Yarra_City,Northern_Metropolitan,4.0,2.5,3067.0,...,2014.0,-37.8072,144.9941,4019.0,1600000.0,Park,Street,2.0,35.5,1.183333


## Engineered Data Saving

In [7]:
df.head()

Unnamed: 0,SaleDate,Suburb,UnitType,SaleMethod,RealEstateAgent,CouncilArea,RegionName,Rooms,DistanceToCBD,Postcode,...,YearBuilt,Latitude,Longitude,NeighbouringProperties,Price,StreetName,StreetType,PropertyAge,AvgRoomSize,BuildingToLandRatio
0,2016-03-12,Abbotsford,House,Sold,Biggin,Yarra_City,Northern_Metropolitan,2.0,2.5,3067.0,...,1970.0,-37.7996,144.9984,4019.0,1480000.0,Turner,Street,46.0,66.5,0.658416
1,2016-04-02,Abbotsford,House,Sold,Biggin,Yarra_City,Northern_Metropolitan,2.0,2.5,3067.0,...,1900.0,-37.8079,144.9934,4019.0,1035000.0,Bloomburg,Street,116.0,39.5,0.50641
2,2017-04-03,Abbotsford,House,Sold_Prior,Biggin,Yarra_City,Northern_Metropolitan,3.0,2.5,3067.0,...,1900.0,-37.8093,144.9944,4019.0,1465000.0,Charles,Street,117.0,50.0,1.119403
3,2017-04-03,Abbotsford,House,Passed_In,Biggin,Yarra_City,Northern_Metropolitan,3.0,2.5,3067.0,...,1970.0,-37.7969,144.9969,4019.0,850000.0,Federation,Lane,47.0,44.333333,1.414894
4,2016-04-06,Abbotsford,House,Vendor_Bid,Nelson,Yarra_City,Northern_Metropolitan,4.0,2.5,3067.0,...,2014.0,-37.8072,144.9941,4019.0,1600000.0,Park,Street,2.0,35.5,1.183333


In [8]:
df.to_csv("../data/ENGINEERED_Melbourne_Housing_Market.csv", index=False)