# Join EPC, deprivation and flood risk data
EPC data: https://epc.opendatacommunities.org/domestic/search  
Deprivation data: https://imd-by-postcode.opendatacommunities.org/imd/2019  
Flood risk data: https://www.getthedata.com/open-flood-risk-by-postcode  
  
Use the postcode previously placed into the Zoopla dataset to join EPC, deprivation and flood risk data. Since EPC is at the property (rather than postcode) level, use the average EPC for that postcode, road and property type

In [1]:
import os
import numpy as np
import pandas as pd
import re
pd.set_option('display.max_columns', 100)

### Read in Zoopla, EPC, deprivation and flood risk files

In [2]:
AREA = 'Nuneaton'
DATA_RAW_FOLDER = os.path.join('data', 'raw')
DATA_PROCESSED_FOLDER = os.path.join('data', 'processed')
SAVE_FOLDER = DATA_PROCESSED_FOLDER

In [3]:
if AREA == 'Nuneaton':
    zoopla_df_filename = 'zoopla_properties_with_postcode_nuneaton.csv'
    epc_df_filename = 'epcs_nuneaton.csv'
    deprivation_df_filename = '2019-deprivation-by-postcode_nuneaton.csv'
elif AREA == 'Hinckley':
    zoopla_df_filename = 'zoopla_properties_with_postcode_hinckley.csv'
    epc_df_filename = 'epcs_hinckley.csv'
    deprivation_df_filename = '2019-deprivation-by-postcode_hinckley.csv'
    
floodrisk_df_filename = 'open_flood_risk_by_postcode.csv'

zoopla_df = pd.read_csv(os.path.join(DATA_PROCESSED_FOLDER, zoopla_df_filename), dtype=str)
epc_df = pd.read_csv(os.path.join(DATA_RAW_FOLDER, epc_df_filename), dtype=str)
deprivation_df = pd.read_csv(os.path.join(DATA_RAW_FOLDER, deprivation_df_filename), dtype=str)
floodrisk_df = pd.read_csv(os.path.join(DATA_RAW_FOLDER, floodrisk_df_filename), header=None, dtype=str)

In [4]:
display(zoopla_df.head())
display(epc_df.head())
display(deprivation_df.head())
display(floodrisk_df.head())

Unnamed: 0,agent_logo,outcode,price_modifier,num_recepts,street_name,first_published_date,agent_address,property_type,floor_plan,details_url,country,num_bathrooms,agent_name,listing_status,listing_id,price,displayable_address,image_url,latitude,longitude,description,post_town,country_code,county,last_published_date,num_bedrooms,category,agent_phone,postcode,parish
0,https://st.zoocdn.com/zoopla_static_agent_logo...,CV11,from,3,"Meadow Green, Watling Street",2023-02-04 05:28:55,"Meadow Green, Watling Street, Nuneaton",Detached house,,https://www.zoopla.co.uk/for-sale/details/6388...,England,0,Taylor Wimpey - Meadow Green,sale,63883197,376500.0,"""The Lanford - Plot 322"" at Windrower Close, N...",https://lid.zoocdn.com/354/255/fd606582b571af7...,52.52016999999999,-1.4552873,"Discover this 4 bedroom Lanford home, ideal fo...",Nuneaton,gb,Warwickshire,2023-02-04 05:40:31,4,Residential,024 7511 6265,CV11 4FS,"Nuneaton and Bedworth, unparished area"
1,https://st.zoocdn.com/zoopla_static_agent_logo...,CV11,from,3,"Meadow Green, Watling Street",2023-02-04 05:28:47,"Meadow Green, Watling Street, Nuneaton",Detached house,,https://www.zoopla.co.uk/for-sale/details/6388...,England,0,Taylor Wimpey - Meadow Green,sale,63883200,489950.0,"""The Ransford - Plot 119"" at Windrower Close, ...",https://lid.zoocdn.com/354/255/f5547b1657bfbfd...,52.52016999999999,-1.4552873,This four bedroom Ransford home is perfect for...,Nuneaton,gb,Warwickshire,2023-02-04 05:38:17,4,Residential,024 7511 6265,CV11 4FS,"Nuneaton and Bedworth, unparished area"
2,https://st.zoocdn.com/zoopla_static_agent_logo...,CV11,from,2,"Meadow Green, Watling Street",2023-02-04 05:28:47,"Meadow Green, Watling Street, Nuneaton",Detached house,,https://www.zoopla.co.uk/for-sale/details/6388...,England,0,Taylor Wimpey - Meadow Green,sale,63883198,305000.0,"""The Byford - Plot 323"" at Windrower Close, Nu...",https://lid.zoocdn.com/354/255/941aa37a7610247...,52.52016999999999,-1.4552873,Find out how our mortgage contribution scheme*...,Nuneaton,gb,Warwickshire,2023-02-04 05:38:28,3,Residential,024 7511 6265,CV11 4FS,"Nuneaton and Bedworth, unparished area"
3,https://st.zoocdn.com/zoopla_static_agent_logo...,CV11,from,2,"Meadow Green, Watling Street",2023-02-04 05:28:47,"Meadow Green, Watling Street, Nuneaton",Detached house,,https://www.zoopla.co.uk/for-sale/details/6388...,England,0,Taylor Wimpey - Meadow Green,sale,63883199,314950.0,"""The Amersham - Plot 373"" at Windrower Close, ...",https://lid.zoocdn.com/354/255/b4096bb0c276201...,52.52016999999999,-1.4552873,A delightful three bedroom home with an integr...,Nuneaton,gb,Warwickshire,2023-02-04 05:40:02,3,Residential,024 7511 6265,CV11 4FS,"Nuneaton and Bedworth, unparished area"
4,https://st.zoocdn.com/zoopla_static_agent_logo...,CV10,guide_price,1,Duckpond Lane,2023-02-03 19:18:48,"22 Newdegate Street, Nuneaton",Detached house,,https://www.zoopla.co.uk/for-sale/details/6388...,England,2,Alan Cooper Estates,sale,63881100,300000.0,"Duckpond Lane, Weddington, Nuneaton CV10",https://lid.zoocdn.com/354/255/7649fd019aaf859...,52.54377,-1.463799,Here is a superb double fronted Detached Resid...,Nuneaton,gb,Warwickshire,2023-02-03 19:37:49,3,Residential,024 7513 8435,CV10 0FH,"Nuneaton and Bedworth, unparished area"


Unnamed: 0,LMK_KEY,ADDRESS1,ADDRESS2,ADDRESS3,POSTCODE,BUILDING_REFERENCE_NUMBER,CURRENT_ENERGY_RATING,POTENTIAL_ENERGY_RATING,CURRENT_ENERGY_EFFICIENCY,POTENTIAL_ENERGY_EFFICIENCY,PROPERTY_TYPE,BUILT_FORM,INSPECTION_DATE,LOCAL_AUTHORITY,CONSTITUENCY,COUNTY,LODGEMENT_DATE,TRANSACTION_TYPE,ENVIRONMENT_IMPACT_CURRENT,ENVIRONMENT_IMPACT_POTENTIAL,ENERGY_CONSUMPTION_CURRENT,ENERGY_CONSUMPTION_POTENTIAL,CO2_EMISSIONS_CURRENT,CO2_EMISS_CURR_PER_FLOOR_AREA,CO2_EMISSIONS_POTENTIAL,LIGHTING_COST_CURRENT,LIGHTING_COST_POTENTIAL,HEATING_COST_CURRENT,HEATING_COST_POTENTIAL,HOT_WATER_COST_CURRENT,HOT_WATER_COST_POTENTIAL,TOTAL_FLOOR_AREA,ENERGY_TARIFF,MAINS_GAS_FLAG,FLOOR_LEVEL,FLAT_TOP_STOREY,FLAT_STOREY_COUNT,MAIN_HEATING_CONTROLS,MULTI_GLAZE_PROPORTION,GLAZED_TYPE,GLAZED_AREA,EXTENSION_COUNT,NUMBER_HABITABLE_ROOMS,NUMBER_HEATED_ROOMS,LOW_ENERGY_LIGHTING,NUMBER_OPEN_FIREPLACES,HOTWATER_DESCRIPTION,HOT_WATER_ENERGY_EFF,HOT_WATER_ENV_EFF,FLOOR_DESCRIPTION,FLOOR_ENERGY_EFF,FLOOR_ENV_EFF,WINDOWS_DESCRIPTION,WINDOWS_ENERGY_EFF,WINDOWS_ENV_EFF,WALLS_DESCRIPTION,WALLS_ENERGY_EFF,WALLS_ENV_EFF,SECONDHEAT_DESCRIPTION,SHEATING_ENERGY_EFF,SHEATING_ENV_EFF,ROOF_DESCRIPTION,ROOF_ENERGY_EFF,ROOF_ENV_EFF,MAINHEAT_DESCRIPTION,MAINHEAT_ENERGY_EFF,MAINHEAT_ENV_EFF,MAINHEATCONT_DESCRIPTION,MAINHEATC_ENERGY_EFF,MAINHEATC_ENV_EFF,LIGHTING_DESCRIPTION,LIGHTING_ENERGY_EFF,LIGHTING_ENV_EFF,MAIN_FUEL,WIND_TURBINE_COUNT,HEAT_LOSS_CORRIDOR,UNHEATED_CORRIDOR_LENGTH,FLOOR_HEIGHT,PHOTO_SUPPLY,SOLAR_WATER_HEATING_FLAG,MECHANICAL_VENTILATION,ADDRESS,LOCAL_AUTHORITY_LABEL,CONSTITUENCY_LABEL,POSTTOWN,CONSTRUCTION_AGE_BAND,LODGEMENT_DATETIME,TENURE,FIXED_LIGHTING_OUTLETS_COUNT,LOW_ENERGY_FIXED_LIGHT_COUNT,UPRN,UPRN_SOURCE
0,188e4e7604368b7386e5ff93771a037ccfb150c2861096...,5 Brockey Close,Barwell,,LE9 8BG,10003465551,D,B,68,87,Bungalow,Detached,2022-10-04,E07000132,E14000583,Leicestershire,2022-10-10,Stock condition survey,67,87,270,102,2.2,48,0.9,44,44,418,370,59,39,47,Single,Y,,,,,100.0,double glazing installed before 2002,Normal,0.0,3.0,3.0,100,0.0,From main system,Good,Good,"Solid, no insulation (assumed)",,,Fully double glazed,Average,Average,"Cavity wall, as built, insulated (assumed)",Good,Good,"Room heaters, mains gas",,,"Pitched, 270 mm loft insulation",Good,Good,"Boiler and radiators, mains gas",Good,Good,"Programmer, room thermostat and TRVs",Good,Good,Low energy lighting in all fixed outlets,Very Good,Very Good,mains gas (not community),0,,,2.3,0.0,N,natural,"5 Brockey Close, Barwell",Hinckley and Bosworth,Bosworth,LEICESTER,England and Wales: 1983-1990,2022-10-10 19:12:48,Rented (social),6,,100032074592,Energy Assessor
1,849605851212012102416531797929305,"2, Nob Hill",Norton juxta Twycross,,CV9 3QE,3560172078,D,C,67,80,House,Detached,2012-10-19,E07000132,E14000583,Leicestershire,2012-10-24,marketed sale,60,75,156,94,8.4,34,5.2,112,112,1463,1067,172,105,243,Single,N,NODATA!,,,2106.0,85.0,"double glazing, unknown install date",Normal,1.0,8.0,8.0,73,0.0,From main system,Good,Average,"Solid, no insulation (assumed)",,,Mostly double glazing,Poor,Poor,"Cavity wall, as built, partial insulation (ass...",Average,Average,"Room heaters, wood logs",,,"Pitched, 150 mm loft insulation",Good,Good,"Boiler and radiators, oil",Good,Good,"Programmer, room thermostat and TRVs",Good,Good,Low energy lighting in 73% of fixed outlets,Very Good,Very Good,oil (not community),0,NO DATA!,,,0.0,,natural,"2, Nob Hill, Norton juxta Twycross",Hinckley and Bosworth,Bosworth,ATHERSTONE,England and Wales: 1976-1982,2012-10-24 16:53:17,owner-occupied,26,19.0,100030495131,Address Matched
2,1062831709962013121817485087588537,"21, Barrie Road",,,LE10 0QU,3806087178,E,B,53,82,House,End-Terrace,2013-12-18,E07000132,E14000583,Leicestershire,2013-12-18,assessment for green deal,49,81,303,103,4.9,58,1.7,80,49,892,519,82,58,84,Single,Y,NODATA!,,,2107.0,100.0,"double glazing, unknown install date",Normal,1.0,5.0,5.0,36,0.0,From main system,Good,Good,"Suspended, no insulation (assumed)",,,Fully double glazed,Average,Average,"Solid brick, as built, no insulation (assumed)",Very Poor,Very Poor,"Room heaters, mains gas",,,"Pitched, 200 mm loft insulation",Good,Good,"Boiler and radiators, mains gas",Good,Good,"Programmer, TRVs and bypass",Average,Average,Low energy lighting in 36% of fixed outlets,Average,Average,mains gas (not community),0,NO DATA!,,,0.0,,natural,"21, Barrie Road",Hinckley and Bosworth,Bosworth,HINCKLEY,England and Wales: 1930-1949,2013-12-18 17:48:50,owner-occupied,11,4.0,100030497070,Address Matched
3,641449911152012091816382695920980,"69, Hinckley Road",Earl Shilton,,LE9 7LH,9039157868,D,C,64,79,Bungalow,Detached,2012-09-18,E07000132,E14000583,Leicestershire,2012-09-18,marketed sale,59,76,207,118,5.1,40,3.0,111,62,817,679,110,75,128,Single,Y,NODATA!,,,2106.0,100.0,double glazing installed during or after 2002,Normal,1.0,5.0,5.0,20,1.0,From main system,Good,Good,"Solid, no insulation (assumed)",,,Fully double glazed,Good,Good,"Cavity wall, filled cavity",Good,Good,"Room heaters, mains gas",,,"Pitched, 100 mm loft insulation",Average,Average,"Boiler and radiators, mains gas",Good,Good,"Programmer, room thermostat and TRVs",Good,Good,Low energy lighting in 20% of fixed outlets,Poor,Poor,mains gas (not community),0,NO DATA!,,,0.0,,natural,"69, Hinckley Road, Earl Shilton",Hinckley and Bosworth,Bosworth,LEICESTER,England and Wales: 1930-1949,2012-09-18 16:38:26,owner-occupied,15,3.0,100030519581,Address Matched
4,496648659922010061416010176908480,"8, Pickering Place",Burbage,,LE10 2FJ,8785576768,B,B,83,85,Flat,NO DATA!,2010-06-10,E07000132,E14000583,Leicestershire,2010-06-14,new dwelling,83,84,134,127,1.4,22,1.3,57,34,212,215,85,85,61,standard tariff,,mid floor,,,,,NO DATA!,NO DATA!,,,,3,,From main system,Good,Good,(other premises below),,,Fully double glazed,Good,Good,Average thermal transmittance 0.45 W/m?K,Good,Good,,,,(other premises above),,,"Boiler and radiators, mains gas",Good,Good,"Programmer, room thermostat and TRVs",Average,Average,Low energy lighting in 33% of fixed outlets,Average,Average,mains gas - this is for backwards compatibilit...,0,NO DATA!,,2.45,,,NO DATA!,"8, Pickering Place, Burbage",Hinckley and Bosworth,Bosworth,HINCKLEY,NO DATA!,2010-06-14 16:01:01,,9,3.0,10090026218,Address Matched


Unnamed: 0,Postcode,Postcode Status,LSOA code,LSOA Name,User Data A,User Data B,User Data C,User Data D,User Data E,User Data F,User Data G,User Data H,User Data I,User Data J,User Data K,User Data L,User Data M,User Data N,User Data O,User Data P,Index of Multiple Deprivation Rank,Index of Multiple Deprivation Decile,Income Rank,Income Decile,Income Score,Employment Rank,Employment Decile,Employment Score,Education and Skills Rank,Education and Skills Decile,Health and Disability Rank,Health and Disability Decile,Crime Rank,Crime Decile,Barriers to Housing and Services Rank,Barriers to Housing and Services Decile,Living Environment Rank,Living Environment Decile,IDACI Rank,IDACI Decile,IDACI Score,IDAOPI Rank,IDAOPI Decile,IDAOPI Score
0,Postcode,**UNMATCHED**,,,In Use?,Latitude,Longitude,Easting,Northing,Grid Ref,Ward,Parish,Introduced,Terminated,Altitude,Country,Last Updated,Quality,LSOA Code,LSOA Name,,,,,,,,,,,,,,,,,,,,,,,,
1,CV10 0AA,Live,E01031102,Nuneaton and Bedworth 003C E01031102,Yes,52.52675,-1.46076,436681,292234,SP366922,St. Nicolas,"Nuneaton and Bedworth, unparished area",1980-01-01,,85,England,2022-11-25,Within the building of the matched address clo...,E01031102,Nuneaton and Bedworth 003C,12210.0,4.0,15558.0,5.0,0.105,10631.0,4.0,0.113,23574.0,8.0,7774.0,3.0,4258.0,2.0,10021.0,4.0,18272.0,6.0,19941.0,7.0,0.096,17648.0,6.0,0.118
2,CV10 0AB,Live,E01031102,Nuneaton and Bedworth 003C E01031102,Yes,52.527391,-1.459293,436780,292306,SP367923,St. Nicolas,"Nuneaton and Bedworth, unparished area",1980-01-01,,86,England,2022-11-25,Within the building of the matched address clo...,E01031102,Nuneaton and Bedworth 003C,12210.0,4.0,15558.0,5.0,0.105,10631.0,4.0,0.113,23574.0,8.0,7774.0,3.0,4258.0,2.0,10021.0,4.0,18272.0,6.0,19941.0,7.0,0.096,17648.0,6.0,0.118
3,CV10 0AD,Live,E01031102,Nuneaton and Bedworth 003C E01031102,Yes,52.5276,-1.461965,436599,292328,SP365923,St. Nicolas,"Nuneaton and Bedworth, unparished area",1980-01-01,,86,England,2022-11-25,Within the building of the matched address clo...,E01031102,Nuneaton and Bedworth 003C,12210.0,4.0,15558.0,5.0,0.105,10631.0,4.0,0.113,23574.0,8.0,7774.0,3.0,4258.0,2.0,10021.0,4.0,18272.0,6.0,19941.0,7.0,0.096,17648.0,6.0,0.118
4,CV10 0AE,Terminated,E01031102,Nuneaton and Bedworth 003C E01031102,No,52.52684,-1.462793,436543,292243,SP365922,St. Nicolas,"Nuneaton and Bedworth, unparished area",1980-01-01,2009-11-01,86,England,2022-11-25,Within the building of the matched address clo...,E01031102,Nuneaton and Bedworth 003C,12210.0,4.0,15558.0,5.0,0.105,10631.0,4.0,0.113,23574.0,8.0,7774.0,3.0,4258.0,2.0,10021.0,4.0,18272.0,6.0,19941.0,7.0,0.096,17648.0,6.0,0.118


Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,AL10 0AA,\N,,\N,\N,\N,522503,208775,51.764264,-0.226254
1,AL10 0AB,\N,,\N,\N,\N,522680,209765,51.773122,-0.223341
2,AL10 0AD,\N,,\N,\N,\N,522997,209812,51.773475,-0.218732
3,AL10 0AE,\N,,\N,\N,\N,522530,209750,51.77302,-0.225519
4,AL10 0AG,\N,,\N,\N,\N,522515,209794,51.773419,-0.225721


### Fix column names in each dataset

In [5]:
# Deprivation dataset - colnames spread over two rows.
# If colname starts with 'User Data', then use second row, else first row

for colname in deprivation_df.columns:
    if colname.startswith('User Data'):
        new_colname = deprivation_df[colname].iloc[0]
        deprivation_df.rename(columns={colname: new_colname}, inplace=True)

# remove the first row
deprivation_df = deprivation_df.iloc[1:]
deprivation_df.head()

Unnamed: 0,Postcode,Postcode Status,LSOA code,LSOA Name,In Use?,Latitude,Longitude,Easting,Northing,Grid Ref,Ward,Parish,Introduced,Terminated,Altitude,Country,Last Updated,Quality,LSOA Code,LSOA Name.1,Index of Multiple Deprivation Rank,Index of Multiple Deprivation Decile,Income Rank,Income Decile,Income Score,Employment Rank,Employment Decile,Employment Score,Education and Skills Rank,Education and Skills Decile,Health and Disability Rank,Health and Disability Decile,Crime Rank,Crime Decile,Barriers to Housing and Services Rank,Barriers to Housing and Services Decile,Living Environment Rank,Living Environment Decile,IDACI Rank,IDACI Decile,IDACI Score,IDAOPI Rank,IDAOPI Decile,IDAOPI Score
1,CV10 0AA,Live,E01031102,Nuneaton and Bedworth 003C E01031102,Yes,52.52675,-1.46076,436681,292234,SP366922,St. Nicolas,"Nuneaton and Bedworth, unparished area",1980-01-01,,85,England,2022-11-25,Within the building of the matched address clo...,E01031102,Nuneaton and Bedworth 003C,12210,4,15558,5,0.105,10631,4,0.113,23574,8,7774,3,4258,2,10021,4,18272,6,19941,7,0.096,17648,6,0.118
2,CV10 0AB,Live,E01031102,Nuneaton and Bedworth 003C E01031102,Yes,52.527391,-1.459293,436780,292306,SP367923,St. Nicolas,"Nuneaton and Bedworth, unparished area",1980-01-01,,86,England,2022-11-25,Within the building of the matched address clo...,E01031102,Nuneaton and Bedworth 003C,12210,4,15558,5,0.105,10631,4,0.113,23574,8,7774,3,4258,2,10021,4,18272,6,19941,7,0.096,17648,6,0.118
3,CV10 0AD,Live,E01031102,Nuneaton and Bedworth 003C E01031102,Yes,52.5276,-1.461965,436599,292328,SP365923,St. Nicolas,"Nuneaton and Bedworth, unparished area",1980-01-01,,86,England,2022-11-25,Within the building of the matched address clo...,E01031102,Nuneaton and Bedworth 003C,12210,4,15558,5,0.105,10631,4,0.113,23574,8,7774,3,4258,2,10021,4,18272,6,19941,7,0.096,17648,6,0.118
4,CV10 0AE,Terminated,E01031102,Nuneaton and Bedworth 003C E01031102,No,52.52684,-1.462793,436543,292243,SP365922,St. Nicolas,"Nuneaton and Bedworth, unparished area",1980-01-01,2009-11-01,86,England,2022-11-25,Within the building of the matched address clo...,E01031102,Nuneaton and Bedworth 003C,12210,4,15558,5,0.105,10631,4,0.113,23574,8,7774,3,4258,2,10021,4,18272,6,19941,7,0.096,17648,6,0.118
5,CV10 0AF,Live,E01031113,Nuneaton and Bedworth 001D E01031113,Yes,52.538271,-1.467776,436196,293512,SP361935,Weddington,"Nuneaton and Bedworth, unparished area",2006-04-01,,84,England,2022-11-25,Within the building of the matched address clo...,E01031113,Nuneaton and Bedworth 001D,26040,8,26694,9,0.048,24075,8,0.052,25403,8,21185,7,17466,6,22854,7,12915,4,25758,8,0.059,24829,8,0.072


In [6]:
# Flood risk dataset - set names
colnames = {0: 'postcode', 1: 'FID', 2: 'PROB_4BAND', 3: 'SUITABILITY', 4: 'PUB_DATE',
            5: 'RISK_FOR_INSURANCE_SOP', 6: 'easting', 7: 'northing', 8: 'latitude', 9: 'longitude'}

floodrisk_df.rename(columns=colnames, inplace=True)
floodrisk_df.head()

Unnamed: 0,postcode,FID,PROB_4BAND,SUITABILITY,PUB_DATE,RISK_FOR_INSURANCE_SOP,easting,northing,latitude,longitude
0,AL10 0AA,\N,,\N,\N,\N,522503,208775,51.764264,-0.226254
1,AL10 0AB,\N,,\N,\N,\N,522680,209765,51.773122,-0.223341
2,AL10 0AD,\N,,\N,\N,\N,522997,209812,51.773475,-0.218732
3,AL10 0AE,\N,,\N,\N,\N,522530,209750,51.77302,-0.225519
4,AL10 0AG,\N,,\N,\N,\N,522515,209794,51.773419,-0.225721


### Get street name from EPC dataset

In [7]:
def get_street_name(address_1, address_2):
    
    """
    Get street name from first two street address fields
    """
    
    street_and_road = re.compile(r'^\d,\s+')
    
    # if street name starts with a number (maybe followed by comma) and a space, likely next part is street name
    if street_and_road.match(address_1):
        street = re.split(street_and_road, address_1)[1].lower()
        
    # otherwise choose the second part of the address as the street name
    else:
        street = str(address_2).lower()
        
    return street

In [8]:
epc_df['Street'] = epc_df[['ADDRESS1', 'ADDRESS2']].apply(lambda x: get_street_name(x[0], x[1]), axis=1)

### Also remove prefix 'England and Wales: ' from construction age band in EPC data, and drop imvalid values

In [9]:
epc_df['CONSTRUCTION_AGE_BAND'].value_counts(dropna=False)

England and Wales: 1950-1966       5445
NO DATA!                           5351
England and Wales: 1967-1975       5160
England and Wales: 1930-1949       4618
England and Wales: 1900-1929       4223
England and Wales: 1983-1990       3541
England and Wales: 1976-1982       3190
England and Wales: before 1900     2384
England and Wales: 1996-2002       2264
England and Wales: 2003-2006       2126
England and Wales: 1991-1995       1825
England and Wales: 2007 onwards    1011
NaN                                 340
England and Wales: 2007-2011        334
2021                                186
2022                                169
2020                                146
INVALID!                            100
England and Wales: 2012 onwards      74
2018                                 73
2019                                 54
2017                                 26
2016                                 17
1930                                  6
Name: CONSTRUCTION_AGE_BAND, dtype: int6

In [10]:
# e.g. England and Wales: 1967-1975 -> 1967-1975
epc_df['CONSTRUCTION_AGE_BAND'] = epc_df['CONSTRUCTION_AGE_BAND'].replace('England and Wales: ' , '', regex=True)
epc_df['CONSTRUCTION_AGE_BAND'] = epc_df['CONSTRUCTION_AGE_BAND'].replace(r'(NO DATA!|INVALID!)' , np.nan, regex=True)

In [11]:
epc_df['CONSTRUCTION_AGE_BAND'].value_counts(dropna=False)

NaN             5791
1950-1966       5445
1967-1975       5160
1930-1949       4618
1900-1929       4223
1983-1990       3541
1976-1982       3190
before 1900     2384
1996-2002       2264
2003-2006       2126
1991-1995       1825
2007 onwards    1011
2007-2011        334
2021             186
2022             169
2020             146
2012 onwards      74
2018              73
2019              54
2017              26
2016              17
1930               6
Name: CONSTRUCTION_AGE_BAND, dtype: int64

### Select out relevant columns in each dataset and drop duplicates

In [12]:
zoopla_df = zoopla_df[['listing_id', 'parish', 'post_town', 'postcode', 'street_name', 'latitude', 'longitude',
                       'property_type', 'num_bedrooms', 'num_bathrooms', 'description',
                       'first_published_date', 'last_published_date', 'price', 'price_modifier']].drop_duplicates()

epc_df = epc_df[['Street', 'POSTCODE', 'CURRENT_ENERGY_RATING', 'POTENTIAL_ENERGY_RATING',
                 'PROPERTY_TYPE', 'BUILT_FORM', 'TOTAL_FLOOR_AREA', 
                 'NUMBER_HABITABLE_ROOMS', 'CONSTRUCTION_AGE_BAND']].drop_duplicates()
epc_df.rename(columns={'POSTCODE': 'postcode'}, inplace=True)

deprivation_df = deprivation_df[['Postcode', 'Index of Multiple Deprivation Decile', 'Income Decile',
                                 'Employment Decile', 'Education and Skills Decile',
                                 'Health and Disability Decile', 'Crime Decile', 
                                 'Barriers to Housing and Services Decile', 'Living Environment Decile',
                                 'IDACI Decile', 'IDAOPI Decile']].drop_duplicates()
deprivation_df.rename(columns={'Postcode': 'postcode'}, inplace=True)

floodrisk_df = floodrisk_df[['postcode', 'PROB_4BAND']].drop_duplicates()

### Map PROPERTY_TYPE and BUILT_FORM in EPC data onto those for property_type in Zoopla data
We will then be able to narrow down possible EPC data houses onto the Zoopla house and join them

In [13]:
# first see if EPC appears in any of the Zoopla property descriptions
zoopla_df_epcs = zoopla_df[zoopla_df['description'].str.contains('epc', case=False)]['description']
for epc in zoopla_df_epcs:
    print(epc)

Here is a Detached Residence with four bedrooms occupying a prominent corner plot within this highly regarded and most sought-after location opposite Nuneaton Golf Club and handy for all local amenities. EPC rating D.
A vastly improved Semi Detached House in a sought-after residential area and offering excellent family accommodation with three bedrooms and two bathrooms. EPC rating C.
Here is a modern three storey Semi Detached Residence offering well planned accommodation designed to suit the needs of a modern family lifestyle. Early Viewing advised EPC rating B.
Here is a most delightful traditional style Semi Detached House offering much improved and particularly well maintained accommodation enjoying a wealth of charm and character throughout. EPC rating D.
Here's a great starter home! A larger style centre terrace house offering deceptively spacious and well presented accommodation considered ideal for the first time buyer. EPC rating E.
Unexpectedly avaiable; A refurbished and mu

Very few descriptions have EPCs so use the EPC data to infer them

In [14]:
print('Zoopla house types:')
display(zoopla_df['property_type'].value_counts())

print('\nEPC house types:')
display(epc_df.groupby(['PROPERTY_TYPE', 'BUILT_FORM'])['PROPERTY_TYPE'].count())

Zoopla house types:


Detached house            183
Semi-detached house       155
Terraced house             90
Flat                       44
End terrace house          36
Detached bungalow          19
Mobile/park home           12
Link-detached house        12
Semi-detached bungalow      8
Bungalow                    6
Land                        5
Town house                  5
Maisonette                  2
Mews house                  2
Cottage                     1
Chalet                      1
Name: property_type, dtype: int64


EPC house types:


PROPERTY_TYPE  BUILT_FORM          
Bungalow       Detached                 2935
               Enclosed End-Terrace        2
               Enclosed Mid-Terrace        2
               End-Terrace               370
               Mid-Terrace               449
               NO DATA!                    2
               Semi-Detached            1564
Flat           Detached                  707
               Enclosed End-Terrace      145
               Enclosed Mid-Terrace       89
               End-Terrace               784
               Mid-Terrace              1023
               NO DATA!                  168
               Semi-Detached            1302
House          Detached                 9145
               Enclosed End-Terrace       90
               Enclosed Mid-Terrace       25
               End-Terrace              2629
               Mid-Terrace              4744
               NO DATA!                  145
               Semi-Detached           11145
Maisonette     Deta

In [15]:
def set_property_genre_epc(property_type, built_form):
    
    """
    Map the EPC property type and built form pairs into the Zoopla values
    """
    
    property_type = str(property_type).lower()
    built_form = str(built_form).lower()
    
    if property_type == 'bungalow':
        property_genre = 'Bungalow'
            
    elif property_type == 'flat':
        property_genre = 'Flat'
        
    elif property_type == 'house':
        if built_form == 'detached':
            property_genre = 'Detached house'
        elif built_form == 'semi-detached':
            property_genre = 'Semi-detached house'
        elif 'end-terrace' in built_form:
            property_genre = 'End terrace house'
        elif 'mid-terrace' in built_form:
            property_genre = 'Terraced house'
        else:
            property_genre = 'House'
            
    elif property_type == 'maisonette':
        property_genre = 'Maisonette'
        
    else:
        property_genre = 'Other/Unknown'
        
    return property_genre


In [16]:
def set_property_genre_zoopla(property_type):
    
    """
    Merge rare Zoopla property types into their own parent category or an 'Other' genre
    This is to create a temporary property type field that we can then join on to the EPC data
    But for the prediction, we will use the original property types
    """
    
    property_type = str(property_type).lower()
    
    if property_type == 'town house':
        property_genre = 'Terraced house'
    elif property_type == 'link-detached house':
        property_genre = 'Detached house'
    elif 'bungalow' in property_type:
        property_genre = 'Bungalow'
    elif property_type in ['semi-detached house', 'detached house', 'bungalow', 'flat',
       'town house', 'link-detached house', 'end terrace house', 'terraced house',
       'detached bungalow', 'maisonette', 'semi-detached bungalow']:
        property_genre = property_type.capitalize()
    else:
        property_genre = 'Other/Unknown'
        
    return property_genre
    

In [17]:
epc_df['property_type_temp'] = epc_df[['PROPERTY_TYPE', 'BUILT_FORM']].apply(
    lambda x: set_property_genre_epc(x[0], x[1]), axis=1)

zoopla_df['property_type_temp'] = zoopla_df['property_type'].apply(set_property_genre_zoopla)

display(epc_df['property_type_temp'].value_counts())
display(zoopla_df['property_type_temp'].value_counts())

Semi-detached house    11145
Detached house          9145
Bungalow                5324
Terraced house          4769
Flat                    4218
End terrace house       2719
Maisonette               530
House                    145
Other/Unknown              3
Name: property_type_temp, dtype: int64

Detached house         195
Semi-detached house    155
Terraced house          95
Flat                    44
End terrace house       36
Bungalow                33
Other/Unknown           33
Maisonette               2
Name: property_type_temp, dtype: int64

Convert numerical fields from strings to numbers, and energy ratings to ints. Since energy ratings form an ordered scale, it makes sense to convert these from categorical to numerical values for the ML step later

In [18]:
epc_df['TOTAL_FLOOR_AREA'] = epc_df['TOTAL_FLOOR_AREA'].astype(float, errors='ignore')
epc_df['NUMBER_HABITABLE_ROOMS'] = epc_df['NUMBER_HABITABLE_ROOMS'].astype(float, errors='ignore')

energy_rating_mapper = {
    'A': 1, 'B': 2, 'C': 3, 'D': 4, 'E': 5, 'F': 6, 'G': 7
}
epc_df['CURRENT_ENERGY_RATING'] = epc_df['CURRENT_ENERGY_RATING'].map(energy_rating_mapper)
epc_df['POTENTIAL_ENERGY_RATING'] = epc_df['POTENTIAL_ENERGY_RATING'].map(energy_rating_mapper)

Now for a given postcode and property type in the EPC dataset, get both the most common EPC and range of EPCs

In [19]:
def get_mode(values):
    
    """
    Get most frequent value of a column in a given group.
    If there are no values, or all values are null, return null
    """

    if len(values) == 0 or all(pd.isnull(values)):
        return np.nan
    else:
        return pd.Series.mode(values)[0]

epc_df = epc_df.groupby(by=['postcode', 'property_type_temp']).agg({
    'Street': lambda x: get_mode(x),
    'CURRENT_ENERGY_RATING': ['min', 'median', 'max'],
    'POTENTIAL_ENERGY_RATING': ['min', 'median', 'max'],
    'TOTAL_FLOOR_AREA': ['min', 'median', 'max'],
    'NUMBER_HABITABLE_ROOMS': ['min', 'median', 'max'],
    'CONSTRUCTION_AGE_BAND': lambda x: get_mode(x)
})

In [20]:
epc_df.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,Street,CURRENT_ENERGY_RATING,CURRENT_ENERGY_RATING,CURRENT_ENERGY_RATING,POTENTIAL_ENERGY_RATING,POTENTIAL_ENERGY_RATING,POTENTIAL_ENERGY_RATING,TOTAL_FLOOR_AREA,TOTAL_FLOOR_AREA,TOTAL_FLOOR_AREA,NUMBER_HABITABLE_ROOMS,NUMBER_HABITABLE_ROOMS,NUMBER_HABITABLE_ROOMS,CONSTRUCTION_AGE_BAND
Unnamed: 0_level_1,Unnamed: 1_level_1,<lambda>,min,median,max,min,median,max,min,median,max,min,median,max,<lambda>
postcode,property_type_temp,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
CV10 0RY,Detached house,atherstone road,3,3.0,3,2,2.0,2,202.0,202.0,202.0,7.0,7.0,7.0,1967-1975
CV10 0SB,Bungalow,kenilworth farm,3,3.0,3,1,1.0,1,19.0,21.5,36.0,,,,
CV10 0SB,Detached house,watling street,6,6.0,6,2,2.0,2,225.0,225.0,225.0,7.0,7.0,7.0,before 1900
CV10 0TT,Detached house,watling street,6,6.0,6,4,4.0,4,195.0,195.0,195.0,8.0,8.0,8.0,1950-1966
CV10 0TT,Semi-detached house,mira drive,4,4.5,5,2,2.0,2,81.0,85.0,89.0,5.0,5.0,5.0,1950-1966
CV10 0TZ,Flat,youth with a mission,3,3.0,3,3,3.0,3,72.0,72.0,72.0,4.0,4.0,4.0,1967-1975
CV13 0AA,Bungalow,shenton lane,5,6.0,7,1,5.0,6,62.0,64.5,75.0,3.0,3.5,4.0,1900-1929
CV13 0AD,Detached house,ambion lane,6,6.0,6,5,5.0,5,104.0,104.0,104.0,2.0,2.0,2.0,1991-1995
CV13 0AE,Detached house,sutton cheney,5,5.0,5,2,2.0,2,175.0,175.0,175.0,6.0,6.0,6.0,before 1900
CV13 0AG,Bungalow,main street,4,5.0,6,3,3.5,4,69.0,78.5,106.0,1.0,5.0,6.0,1967-1975


In [21]:
# set the indices back as columns so we can join them to the zoopla dataframe
epc_df = epc_df.reset_index()

In [22]:
# set the columnnames from two-level to one-level

print(epc_df.columns)

new_colnames = []

# loop over columns; if colname has an inner level name, append this to the outer level name
# otherwise just keep the outer level name
for i in range(len(epc_df.columns)):
    if epc_df.columns[i][1] != '':
        new_colnames.append(epc_df.columns[i][0] + '_' + epc_df.columns[i][1])
    else:
        new_colnames.append(epc_df.columns[i][0])

epc_df.columns = new_colnames
print(epc_df.columns)

MultiIndex([(               'postcode',         ''),
            (     'property_type_temp',         ''),
            (                 'Street', '<lambda>'),
            (  'CURRENT_ENERGY_RATING',      'min'),
            (  'CURRENT_ENERGY_RATING',   'median'),
            (  'CURRENT_ENERGY_RATING',      'max'),
            ('POTENTIAL_ENERGY_RATING',      'min'),
            ('POTENTIAL_ENERGY_RATING',   'median'),
            ('POTENTIAL_ENERGY_RATING',      'max'),
            (       'TOTAL_FLOOR_AREA',      'min'),
            (       'TOTAL_FLOOR_AREA',   'median'),
            (       'TOTAL_FLOOR_AREA',      'max'),
            ( 'NUMBER_HABITABLE_ROOMS',      'min'),
            ( 'NUMBER_HABITABLE_ROOMS',   'median'),
            ( 'NUMBER_HABITABLE_ROOMS',      'max'),
            (  'CONSTRUCTION_AGE_BAND', '<lambda>')],
           )
Index(['postcode', 'property_type_temp', 'Street_<lambda>',
       'CURRENT_ENERGY_RATING_min', 'CURRENT_ENERGY_RATING_median',
       'CU

In [23]:
# rename the lambda columns to more meaningful names
epc_df.rename(columns={'Street_<lambda>': 'Street_mode', 
                       'CONSTRUCTION_AGE_BAND_<lambda>': 'CONSTRUCTION_AGE_BAND_mode'}, inplace=True)

In [24]:
epc_df.head()

Unnamed: 0,postcode,property_type_temp,Street_mode,CURRENT_ENERGY_RATING_min,CURRENT_ENERGY_RATING_median,CURRENT_ENERGY_RATING_max,POTENTIAL_ENERGY_RATING_min,POTENTIAL_ENERGY_RATING_median,POTENTIAL_ENERGY_RATING_max,TOTAL_FLOOR_AREA_min,TOTAL_FLOOR_AREA_median,TOTAL_FLOOR_AREA_max,NUMBER_HABITABLE_ROOMS_min,NUMBER_HABITABLE_ROOMS_median,NUMBER_HABITABLE_ROOMS_max,CONSTRUCTION_AGE_BAND_mode
0,CV10 0RY,Detached house,atherstone road,3,3.0,3,2,2.0,2,202.0,202.0,202.0,7.0,7.0,7.0,1967-1975
1,CV10 0SB,Bungalow,kenilworth farm,3,3.0,3,1,1.0,1,19.0,21.5,36.0,,,,
2,CV10 0SB,Detached house,watling street,6,6.0,6,2,2.0,2,225.0,225.0,225.0,7.0,7.0,7.0,before 1900
3,CV10 0TT,Detached house,watling street,6,6.0,6,4,4.0,4,195.0,195.0,195.0,8.0,8.0,8.0,1950-1966
4,CV10 0TT,Semi-detached house,mira drive,4,4.5,5,2,2.0,2,81.0,85.0,89.0,5.0,5.0,5.0,1950-1966


### Left join EPC dataset to Zoopla dataset
If postcode and property type same, then join

In [25]:
zoopla_df = zoopla_df.merge(epc_df, on=['postcode', 'property_type_temp'], how='left')
zoopla_df.head()

Unnamed: 0,listing_id,parish,post_town,postcode,street_name,latitude,longitude,property_type,num_bedrooms,num_bathrooms,description,first_published_date,last_published_date,price,price_modifier,property_type_temp,Street_mode,CURRENT_ENERGY_RATING_min,CURRENT_ENERGY_RATING_median,CURRENT_ENERGY_RATING_max,POTENTIAL_ENERGY_RATING_min,POTENTIAL_ENERGY_RATING_median,POTENTIAL_ENERGY_RATING_max,TOTAL_FLOOR_AREA_min,TOTAL_FLOOR_AREA_median,TOTAL_FLOOR_AREA_max,NUMBER_HABITABLE_ROOMS_min,NUMBER_HABITABLE_ROOMS_median,NUMBER_HABITABLE_ROOMS_max,CONSTRUCTION_AGE_BAND_mode
0,63883197,"Nuneaton and Bedworth, unparished area",Nuneaton,CV11 4FS,"Meadow Green, Watling Street",52.52016999999999,-1.4552873,Detached house,4,0,"Discover this 4 bedroom Lanford home, ideal fo...",2023-02-04 05:28:55,2023-02-04 05:40:31,376500.0,from,Detached house,,,,,,,,,,,,,,
1,63883200,"Nuneaton and Bedworth, unparished area",Nuneaton,CV11 4FS,"Meadow Green, Watling Street",52.52016999999999,-1.4552873,Detached house,4,0,This four bedroom Ransford home is perfect for...,2023-02-04 05:28:47,2023-02-04 05:38:17,489950.0,from,Detached house,,,,,,,,,,,,,,
2,63883198,"Nuneaton and Bedworth, unparished area",Nuneaton,CV11 4FS,"Meadow Green, Watling Street",52.52016999999999,-1.4552873,Detached house,3,0,Find out how our mortgage contribution scheme*...,2023-02-04 05:28:47,2023-02-04 05:38:28,305000.0,from,Detached house,,,,,,,,,,,,,,
3,63883199,"Nuneaton and Bedworth, unparished area",Nuneaton,CV11 4FS,"Meadow Green, Watling Street",52.52016999999999,-1.4552873,Detached house,3,0,A delightful three bedroom home with an integr...,2023-02-04 05:28:47,2023-02-04 05:40:02,314950.0,from,Detached house,,,,,,,,,,,,,,
4,63881100,"Nuneaton and Bedworth, unparished area",Nuneaton,CV10 0FH,Duckpond Lane,52.54377,-1.463799,Detached house,3,2,Here is a superb double fronted Detached Resid...,2023-02-03 19:18:48,2023-02-03 19:37:49,300000.0,guide_price,Detached house,,,,,,,,,,,,,,


### Left join EPC dataset to deprivation dataset
If postcode same, then join

In [26]:
zoopla_df = zoopla_df.merge(deprivation_df, on='postcode', how='left')
zoopla_df.head()

Unnamed: 0,listing_id,parish,post_town,postcode,street_name,latitude,longitude,property_type,num_bedrooms,num_bathrooms,description,first_published_date,last_published_date,price,price_modifier,property_type_temp,Street_mode,CURRENT_ENERGY_RATING_min,CURRENT_ENERGY_RATING_median,CURRENT_ENERGY_RATING_max,POTENTIAL_ENERGY_RATING_min,POTENTIAL_ENERGY_RATING_median,POTENTIAL_ENERGY_RATING_max,TOTAL_FLOOR_AREA_min,TOTAL_FLOOR_AREA_median,TOTAL_FLOOR_AREA_max,NUMBER_HABITABLE_ROOMS_min,NUMBER_HABITABLE_ROOMS_median,NUMBER_HABITABLE_ROOMS_max,CONSTRUCTION_AGE_BAND_mode,Index of Multiple Deprivation Decile,Income Decile,Employment Decile,Education and Skills Decile,Health and Disability Decile,Crime Decile,Barriers to Housing and Services Decile,Living Environment Decile,IDACI Decile,IDAOPI Decile
0,63883197,"Nuneaton and Bedworth, unparished area",Nuneaton,CV11 4FS,"Meadow Green, Watling Street",52.52016999999999,-1.4552873,Detached house,4,0,"Discover this 4 bedroom Lanford home, ideal fo...",2023-02-04 05:28:55,2023-02-04 05:40:31,376500.0,from,Detached house,,,,,,,,,,,,,,,4,4,3,4,4,4,7,2,4,4
1,63883200,"Nuneaton and Bedworth, unparished area",Nuneaton,CV11 4FS,"Meadow Green, Watling Street",52.52016999999999,-1.4552873,Detached house,4,0,This four bedroom Ransford home is perfect for...,2023-02-04 05:28:47,2023-02-04 05:38:17,489950.0,from,Detached house,,,,,,,,,,,,,,,4,4,3,4,4,4,7,2,4,4
2,63883198,"Nuneaton and Bedworth, unparished area",Nuneaton,CV11 4FS,"Meadow Green, Watling Street",52.52016999999999,-1.4552873,Detached house,3,0,Find out how our mortgage contribution scheme*...,2023-02-04 05:28:47,2023-02-04 05:38:28,305000.0,from,Detached house,,,,,,,,,,,,,,,4,4,3,4,4,4,7,2,4,4
3,63883199,"Nuneaton and Bedworth, unparished area",Nuneaton,CV11 4FS,"Meadow Green, Watling Street",52.52016999999999,-1.4552873,Detached house,3,0,A delightful three bedroom home with an integr...,2023-02-04 05:28:47,2023-02-04 05:40:02,314950.0,from,Detached house,,,,,,,,,,,,,,,4,4,3,4,4,4,7,2,4,4
4,63881100,"Nuneaton and Bedworth, unparished area",Nuneaton,CV10 0FH,Duckpond Lane,52.54377,-1.463799,Detached house,3,2,Here is a superb double fronted Detached Resid...,2023-02-03 19:18:48,2023-02-03 19:37:49,300000.0,guide_price,Detached house,,,,,,,,,,,,,,,8,9,8,8,7,6,7,4,8,8


### Left join EPC dataset to flood risk dataset
If postcode same, then join

In [27]:
zoopla_df = zoopla_df.merge(floodrisk_df, on='postcode', how='left')
zoopla_df.head()

Unnamed: 0,listing_id,parish,post_town,postcode,street_name,latitude,longitude,property_type,num_bedrooms,num_bathrooms,description,first_published_date,last_published_date,price,price_modifier,property_type_temp,Street_mode,CURRENT_ENERGY_RATING_min,CURRENT_ENERGY_RATING_median,CURRENT_ENERGY_RATING_max,POTENTIAL_ENERGY_RATING_min,POTENTIAL_ENERGY_RATING_median,POTENTIAL_ENERGY_RATING_max,TOTAL_FLOOR_AREA_min,TOTAL_FLOOR_AREA_median,TOTAL_FLOOR_AREA_max,NUMBER_HABITABLE_ROOMS_min,NUMBER_HABITABLE_ROOMS_median,NUMBER_HABITABLE_ROOMS_max,CONSTRUCTION_AGE_BAND_mode,Index of Multiple Deprivation Decile,Income Decile,Employment Decile,Education and Skills Decile,Health and Disability Decile,Crime Decile,Barriers to Housing and Services Decile,Living Environment Decile,IDACI Decile,IDAOPI Decile,PROB_4BAND
0,63883197,"Nuneaton and Bedworth, unparished area",Nuneaton,CV11 4FS,"Meadow Green, Watling Street",52.52016999999999,-1.4552873,Detached house,4,0,"Discover this 4 bedroom Lanford home, ideal fo...",2023-02-04 05:28:55,2023-02-04 05:40:31,376500.0,from,Detached house,,,,,,,,,,,,,,,4,4,3,4,4,4,7,2,4,4,
1,63883200,"Nuneaton and Bedworth, unparished area",Nuneaton,CV11 4FS,"Meadow Green, Watling Street",52.52016999999999,-1.4552873,Detached house,4,0,This four bedroom Ransford home is perfect for...,2023-02-04 05:28:47,2023-02-04 05:38:17,489950.0,from,Detached house,,,,,,,,,,,,,,,4,4,3,4,4,4,7,2,4,4,
2,63883198,"Nuneaton and Bedworth, unparished area",Nuneaton,CV11 4FS,"Meadow Green, Watling Street",52.52016999999999,-1.4552873,Detached house,3,0,Find out how our mortgage contribution scheme*...,2023-02-04 05:28:47,2023-02-04 05:38:28,305000.0,from,Detached house,,,,,,,,,,,,,,,4,4,3,4,4,4,7,2,4,4,
3,63883199,"Nuneaton and Bedworth, unparished area",Nuneaton,CV11 4FS,"Meadow Green, Watling Street",52.52016999999999,-1.4552873,Detached house,3,0,A delightful three bedroom home with an integr...,2023-02-04 05:28:47,2023-02-04 05:40:02,314950.0,from,Detached house,,,,,,,,,,,,,,,4,4,3,4,4,4,7,2,4,4,
4,63881100,"Nuneaton and Bedworth, unparished area",Nuneaton,CV10 0FH,Duckpond Lane,52.54377,-1.463799,Detached house,3,2,Here is a superb double fronted Detached Resid...,2023-02-03 19:18:48,2023-02-03 19:37:49,300000.0,guide_price,Detached house,,,,,,,,,,,,,,,8,9,8,8,7,6,7,4,8,8,


### Save to csv file

In [28]:
try:
    os.mkdir(SAVE_FOLDER)
except OSError:
    pass

save_file = os.path.join(SAVE_FOLDER, f'zoopla_properties_with_postcode_epc_dep_flood_{AREA.lower()}.csv')
    
zoopla_df.to_csv(save_file, index=False)