# Feature Engineering
This notebook handles:
1. Separating Street Names and Types
2. Calculate Property Age
3. Calculate Average Room Size
4. Calculate Building Area to Land Area Ratio

In [9]:
import pandas as pd

df = pd.read_csv("../data/CLEANED_Melbourne_Housing_Market.csv")
df["SaleDate"] = pd.to_datetime(df["SaleDate"])
df.head()

Unnamed: 0,SaleDate,Address,CouncilArea,RealEstateAgent,RegionName,SaleMethod,Suburb,UnitType,Bathrooms,Bedrooms,...,CarSpots,DistanceToCBD,LandSize,Latitude,Longitude,NeighbouringProperties,Postcode,Rooms,YearBuilt,Price
0,2016-03-12,85_Turner_St,Yarra_City,Biggin,Northern_Metropolitan,Sold,Abbotsford,House,1.0,2.0,...,1.0,2.5,202.0,-37.7996,144.9984,4019.0,3067.0,2.0,1970.0,1480000.0
1,2016-04-02,25_Bloomburg_St,Yarra_City,Biggin,Northern_Metropolitan,Sold,Abbotsford,House,1.0,2.0,...,0.0,2.5,156.0,-37.8079,144.9934,4019.0,3067.0,2.0,1900.0,1035000.0
2,2017-04-03,5_Charles_St,Yarra_City,Biggin,Northern_Metropolitan,Sold_Prior,Abbotsford,House,2.0,3.0,...,0.0,2.5,134.0,-37.8093,144.9944,4019.0,3067.0,3.0,1900.0,1465000.0
3,2017-04-03,40_Federation_La,Yarra_City,Biggin,Northern_Metropolitan,Passed_In,Abbotsford,House,2.0,3.0,...,1.0,2.5,94.0,-37.7969,144.9969,4019.0,3067.0,3.0,1970.0,850000.0
4,2016-04-06,55A_Park_St,Yarra_City,Nelson,Northern_Metropolitan,Vendor_Bid,Abbotsford,House,1.0,3.0,...,2.0,2.5,120.0,-37.8072,144.9941,4019.0,3067.0,4.0,2014.0,1600000.0


In [10]:
from data_manipulation import feature_engineering as fe

## Separating Street Names and Types

In [11]:
df[["StreetName", "StreetType"]] = df.apply(fe.separate_address, axis=1)
df["StreetType"] = df.apply(fe.get_full_street_type, axis=1)
df = fe.remove_column(df, "Address")
df.head()

Unnamed: 0,SaleDate,CouncilArea,RealEstateAgent,RegionName,SaleMethod,Suburb,UnitType,Bathrooms,Bedrooms,BuildingArea,...,LandSize,Latitude,Longitude,NeighbouringProperties,Postcode,Rooms,YearBuilt,Price,StreetName,StreetType
0,2016-03-12,Yarra_City,Biggin,Northern_Metropolitan,Sold,Abbotsford,House,1.0,2.0,133.0,...,202.0,-37.7996,144.9984,4019.0,3067.0,2.0,1970.0,1480000.0,Turner,Street
1,2016-04-02,Yarra_City,Biggin,Northern_Metropolitan,Sold,Abbotsford,House,1.0,2.0,79.0,...,156.0,-37.8079,144.9934,4019.0,3067.0,2.0,1900.0,1035000.0,Bloomburg,Street
2,2017-04-03,Yarra_City,Biggin,Northern_Metropolitan,Sold_Prior,Abbotsford,House,2.0,3.0,150.0,...,134.0,-37.8093,144.9944,4019.0,3067.0,3.0,1900.0,1465000.0,Charles,Street
3,2017-04-03,Yarra_City,Biggin,Northern_Metropolitan,Passed_In,Abbotsford,House,2.0,3.0,133.0,...,94.0,-37.7969,144.9969,4019.0,3067.0,3.0,1970.0,850000.0,Federation,Lane
4,2016-04-06,Yarra_City,Nelson,Northern_Metropolitan,Vendor_Bid,Abbotsford,House,1.0,3.0,142.0,...,120.0,-37.8072,144.9941,4019.0,3067.0,4.0,2014.0,1600000.0,Park,Street


## Calculate Property Age

In [12]:
df["PropertyAge"] = df.apply(fe.calc_property_age, axis=1)
df.head()

Unnamed: 0,SaleDate,CouncilArea,RealEstateAgent,RegionName,SaleMethod,Suburb,UnitType,Bathrooms,Bedrooms,BuildingArea,...,Latitude,Longitude,NeighbouringProperties,Postcode,Rooms,YearBuilt,Price,StreetName,StreetType,PropertyAge
0,2016-03-12,Yarra_City,Biggin,Northern_Metropolitan,Sold,Abbotsford,House,1.0,2.0,133.0,...,-37.7996,144.9984,4019.0,3067.0,2.0,1970.0,1480000.0,Turner,Street,46.0
1,2016-04-02,Yarra_City,Biggin,Northern_Metropolitan,Sold,Abbotsford,House,1.0,2.0,79.0,...,-37.8079,144.9934,4019.0,3067.0,2.0,1900.0,1035000.0,Bloomburg,Street,116.0
2,2017-04-03,Yarra_City,Biggin,Northern_Metropolitan,Sold_Prior,Abbotsford,House,2.0,3.0,150.0,...,-37.8093,144.9944,4019.0,3067.0,3.0,1900.0,1465000.0,Charles,Street,117.0
3,2017-04-03,Yarra_City,Biggin,Northern_Metropolitan,Passed_In,Abbotsford,House,2.0,3.0,133.0,...,-37.7969,144.9969,4019.0,3067.0,3.0,1970.0,850000.0,Federation,Lane,47.0
4,2016-04-06,Yarra_City,Nelson,Northern_Metropolitan,Vendor_Bid,Abbotsford,House,1.0,3.0,142.0,...,-37.8072,144.9941,4019.0,3067.0,4.0,2014.0,1600000.0,Park,Street,2.0


## Calculate Average Room Size

In [13]:
df["AvgRoomSize"] = df.apply(fe.calc_avg_room_size, axis=1)
df.head()

Unnamed: 0,SaleDate,CouncilArea,RealEstateAgent,RegionName,SaleMethod,Suburb,UnitType,Bathrooms,Bedrooms,BuildingArea,...,Longitude,NeighbouringProperties,Postcode,Rooms,YearBuilt,Price,StreetName,StreetType,PropertyAge,AvgRoomSize
0,2016-03-12,Yarra_City,Biggin,Northern_Metropolitan,Sold,Abbotsford,House,1.0,2.0,133.0,...,144.9984,4019.0,3067.0,2.0,1970.0,1480000.0,Turner,Street,46.0,66.5
1,2016-04-02,Yarra_City,Biggin,Northern_Metropolitan,Sold,Abbotsford,House,1.0,2.0,79.0,...,144.9934,4019.0,3067.0,2.0,1900.0,1035000.0,Bloomburg,Street,116.0,39.5
2,2017-04-03,Yarra_City,Biggin,Northern_Metropolitan,Sold_Prior,Abbotsford,House,2.0,3.0,150.0,...,144.9944,4019.0,3067.0,3.0,1900.0,1465000.0,Charles,Street,117.0,50.0
3,2017-04-03,Yarra_City,Biggin,Northern_Metropolitan,Passed_In,Abbotsford,House,2.0,3.0,133.0,...,144.9969,4019.0,3067.0,3.0,1970.0,850000.0,Federation,Lane,47.0,44.333333
4,2016-04-06,Yarra_City,Nelson,Northern_Metropolitan,Vendor_Bid,Abbotsford,House,1.0,3.0,142.0,...,144.9941,4019.0,3067.0,4.0,2014.0,1600000.0,Park,Street,2.0,35.5


## Calculate Building Area to Land Area Ratio

In [14]:
df["BuildingToLandRatio"] = df.apply(fe.calc_building_to_land_ratio, axis=1)
df.head()

Unnamed: 0,SaleDate,CouncilArea,RealEstateAgent,RegionName,SaleMethod,Suburb,UnitType,Bathrooms,Bedrooms,BuildingArea,...,NeighbouringProperties,Postcode,Rooms,YearBuilt,Price,StreetName,StreetType,PropertyAge,AvgRoomSize,BuildingToLandRatio
0,2016-03-12,Yarra_City,Biggin,Northern_Metropolitan,Sold,Abbotsford,House,1.0,2.0,133.0,...,4019.0,3067.0,2.0,1970.0,1480000.0,Turner,Street,46.0,66.5,0.658416
1,2016-04-02,Yarra_City,Biggin,Northern_Metropolitan,Sold,Abbotsford,House,1.0,2.0,79.0,...,4019.0,3067.0,2.0,1900.0,1035000.0,Bloomburg,Street,116.0,39.5,0.50641
2,2017-04-03,Yarra_City,Biggin,Northern_Metropolitan,Sold_Prior,Abbotsford,House,2.0,3.0,150.0,...,4019.0,3067.0,3.0,1900.0,1465000.0,Charles,Street,117.0,50.0,1.119403
3,2017-04-03,Yarra_City,Biggin,Northern_Metropolitan,Passed_In,Abbotsford,House,2.0,3.0,133.0,...,4019.0,3067.0,3.0,1970.0,850000.0,Federation,Lane,47.0,44.333333,1.414894
4,2016-04-06,Yarra_City,Nelson,Northern_Metropolitan,Vendor_Bid,Abbotsford,House,1.0,3.0,142.0,...,4019.0,3067.0,4.0,2014.0,1600000.0,Park,Street,2.0,35.5,1.183333


## Engineered Data Saving

In [15]:
from data_manipulation.data_cleaning import reorder_df_columns

df = reorder_df_columns(df)
df.head()

Unnamed: 0,SaleDate,CouncilArea,RealEstateAgent,RegionName,SaleMethod,StreetName,StreetType,Suburb,UnitType,AvgRoomSize,...,DistanceToCBD,LandSize,Latitude,Longitude,NeighbouringProperties,Postcode,PropertyAge,Rooms,YearBuilt,Price
0,2016-03-12,Yarra_City,Biggin,Northern_Metropolitan,Sold,Turner,Street,Abbotsford,House,66.5,...,2.5,202.0,-37.7996,144.9984,4019.0,3067.0,46.0,2.0,1970.0,1480000.0
1,2016-04-02,Yarra_City,Biggin,Northern_Metropolitan,Sold,Bloomburg,Street,Abbotsford,House,39.5,...,2.5,156.0,-37.8079,144.9934,4019.0,3067.0,116.0,2.0,1900.0,1035000.0
2,2017-04-03,Yarra_City,Biggin,Northern_Metropolitan,Sold_Prior,Charles,Street,Abbotsford,House,50.0,...,2.5,134.0,-37.8093,144.9944,4019.0,3067.0,117.0,3.0,1900.0,1465000.0
3,2017-04-03,Yarra_City,Biggin,Northern_Metropolitan,Passed_In,Federation,Lane,Abbotsford,House,44.333333,...,2.5,94.0,-37.7969,144.9969,4019.0,3067.0,47.0,3.0,1970.0,850000.0
4,2016-04-06,Yarra_City,Nelson,Northern_Metropolitan,Vendor_Bid,Park,Street,Abbotsford,House,35.5,...,2.5,120.0,-37.8072,144.9941,4019.0,3067.0,2.0,4.0,2014.0,1600000.0


In [16]:
df.to_csv("../data/ENGINEERED_Melbourne_Housing_Market.csv", index=False)