# Wakefield Parcel Data
Explores Wakefield parcel data and looks for code commonality with work done on Holyoke parcel data

Note that the following column names were added:
* Property Account Number
* CONDO_UNIT
* COMPLEX_NAME
* OWNER 3
* UTILITY
* TRAFFIC
* ?PERMIT?
* Number of Buildings (Property Cards)
* HALF BATHS
* KITCHENS
* Building Conditions
* Fireplaces
* Solar Hot Water
* Central Vacuum
* HEAT TYPE
* HEAT FUEL
* PCT_AIR_CONDITIONED
* BSMT_ALT_FINISH
* ROOF_COVER
* ?CURRENT ASSESSMENT?
* SALES PRICE
* ARMS Sales
* GRANTOR LAST NAME
* BK / PG (2)
* DATE (2)
* Sales Price (2)
* ARMS Sales (2)
* GRANTOR LAST NAME (2)
* STATE CODE

In [1]:
import pandas as pd
import numpy as np

In [2]:
FILEPATH = "./parcel_wakefield_raw.csv"

In [3]:
df = pd.read_csv(FILEPATH)

In [4]:
df

Unnamed: 0.1,Unnamed: 0,PCL ID,Property Account Number,ST NU,ST ALT,LOCATION,CONDO_UNIT,COMPLEX_NAME,OWNER,OWNER 2,...,Sales Price (2),ARMS Sales (2),GRANTOR LAST NAME (2),LAND VAL,YARD ITEMS,BUILD VAL,TOTAL VAL,Unnamed: 80,ADJ AREA,STATE CODE
0,1,07-069-182C,10006,,,WALNUT ST,,,"MERULLO, WILLIAM D ...",WILLIAM J MERULLO ...,...,0,,KEHOE ...,42000,14700,0,56700,...,,F_769337_3008027
1,2,07-042-184A,10014,63,-63A,CEDAR ST,,,"KLINE, ROBERT A ...",,...,210000,CONVERSION,BARRETT ...,255500,0,402100,657600,...,5461,F_769446_3007971
2,3,07-036-188A,10022,31,,GOULD ST,,,GOLDSTEIN ALLAN ...,DADY-GOLDSTEIN DIANE M ...,...,333000,,"LEDONNE,RICHARD M ...",254000,3700,212800,470500,...,2359.2,F_769416_3007700
3,4,02-006-20C,1003,,,PARKER RD,,,TOWN OF WAKEFIELD ...,BOARD OF SELECTMEN ...,...,0,,,153500,0,0,153500,...,,F_766364_3011614
4,5,07-038-191A,10031,27,,GOULD ST,,,"PASSERINI, PATRICK A ...",JOYCE K PASSERINI ...,...,64000,3,BERTRAND ...,255300,0,251800,507100,...,2963,F_769530_3007727
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9770,10129,19-2M2-206B,39722,2,,MIDDLESEX ST,# 2,212,KOZACHEK CATHERINE ...,VERTOPI JETMIR ...,...,1,CONVENIENCE,"TWO MIDDLESEX STREET LLC , ...",0,0,550600,550600,...,3186,F_774102_3006007
9771,10130,13-07B-034,22912,7,,BARTLEY ST,,213,FINN LAUREN & RYAN ...,,...,1,CONVENIENCE,"JDR REALTY INVESTMENTS LLC, ...",0,0,628300,628300,...,2757,F_770501_3006591
9772,10131,13-09B-034,22912,9,,BARTLEY ST,,213,YU SYNG M ...,YU BRITTANY K ...,...,1,CONVENIENCE,"JDR REALTY INVESTMENTS LLC, ...",0,0,778600,778600,...,3878,F_770501_3006591
9773,10132,24-225-82A,48274,1014,,MAIN ST,#1,214,NIDEN MATTHEW R ...,OMO NICOLETTA S ...,...,1,CONVENIENCE,"PHAN,KY Q ...",0,0,473300,473300,...,1970,


In [5]:
cols_to_keep = [i for i in df.columns if i.find("Unnamed: ") == -1]

In [6]:
df = df[cols_to_keep]

In [7]:
df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)

In [8]:
df

Unnamed: 0,PCL ID,Property Account Number,ST NU,ST ALT,LOCATION,CONDO_UNIT,COMPLEX_NAME,OWNER,OWNER 2,OWNER 3,...,DATE (2),Sales Price (2),ARMS Sales (2),GRANTOR LAST NAME (2),LAND VAL,YARD ITEMS,BUILD VAL,TOTAL VAL,ADJ AREA,STATE CODE
0,07-069-182C,10006,,,WALNUT ST,,,"MERULLO, WILLIAM D",WILLIAM J MERULLO,,...,12/3/1953,0,,KEHOE,42000,14700,0,56700,,F_769337_3008027
1,07-042-184A,10014,63,-63A,CEDAR ST,,,"KLINE, ROBERT A",,,...,1/8/1990,210000,CONVERSION,BARRETT,255500,0,402100,657600,5461,F_769446_3007971
2,07-036-188A,10022,31,,GOULD ST,,,GOLDSTEIN ALLAN,DADY-GOLDSTEIN DIANE M,,...,6/25/2010,333000,,"LEDONNE,RICHARD M",254000,3700,212800,470500,2359.2,F_769416_3007700
3,02-006-20C,1003,,,PARKER RD,,,TOWN OF WAKEFIELD,BOARD OF SELECTMEN,,...,,0,,,153500,0,0,153500,,F_766364_3011614
4,07-038-191A,10031,27,,GOULD ST,,,"PASSERINI, PATRICK A",JOYCE K PASSERINI,,...,8/17/1981,64000,3,BERTRAND,255300,0,251800,507100,2963,F_769530_3007727
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9770,19-2M2-206B,39722,2,,MIDDLESEX ST,# 2,212,KOZACHEK CATHERINE,VERTOPI JETMIR,,...,3/18/2019,1,CONVENIENCE,"TWO MIDDLESEX STREET LLC ,",0,0,550600,550600,3186,F_774102_3006007
9771,13-07B-034,22912,7,,BARTLEY ST,,213,FINN LAUREN & RYAN,,,...,4/28/2020,1,CONVENIENCE,"JDR REALTY INVESTMENTS LLC,",0,0,628300,628300,2757,F_770501_3006591
9772,13-09B-034,22912,9,,BARTLEY ST,,213,YU SYNG M,YU BRITTANY K,,...,4/28/2020,1,CONVENIENCE,"JDR REALTY INVESTMENTS LLC,",0,0,778600,778600,3878,F_770501_3006591
9773,24-225-82A,48274,1014,,MAIN ST,#1,214,NIDEN MATTHEW R,OMO NICOLETTA S,,...,5/13/2020,1,CONVENIENCE,"PHAN,KY Q",0,0,473300,473300,1970,


In [9]:
df.to_csv("parcel_wakefield_raw_cleaned.csv", index=False)

## Calculate values of note

In [10]:
FILEPATH = "./parcel_wakefield_raw_cleaned.csv"

In [11]:
df = pd.read_csv(FILEPATH)

  exec(code_obj, self.user_global_ns, self.user_ns)


In [12]:
df.iloc[:, 1]

0       10006.0
1       10014.0
2       10022.0
3        1003.0
4       10031.0
         ...   
9770      39722
9771      22912
9772      22912
9773      48274
9774      48274
Name: Property Account Number, Length: 9775, dtype: object

In [13]:
df.iloc[:, 6]

0       NaN
1       NaN
2       NaN
3       NaN
4       NaN
       ... 
9770    212
9771    213
9772    213
9773    214
9774    214
Name: COMPLEX_NAME, Length: 9775, dtype: object

In [14]:
def get_number_of_units(_x):
    floor_area = _x["FLA"]
    num_kitchens = _x["KITCHENS"]
    
    if num_kitchens > 0 and floor_area > 0:
        return num_kitchens
    return 0

In [15]:
df["Number of units"] = df.apply(get_number_of_units, axis=1)

In [16]:
df

Unnamed: 0,PCL ID,Property Account Number,ST NU,ST ALT,LOCATION,CONDO_UNIT,COMPLEX_NAME,OWNER,OWNER 2,OWNER 3,...,Sales Price (2),ARMS Sales (2),GRANTOR LAST NAME (2),LAND VAL,YARD ITEMS,BUILD VAL,TOTAL VAL,ADJ AREA,STATE CODE,Number of units
0,07-069-182C,10006.0,,,WALNUT ST,,,"MERULLO, WILLIAM D",WILLIAM J MERULLO,,...,0,,KEHOE,42000,14700,0,56700,,F_769337_3008027,0
1,07-042-184A,10014.0,63,-63A,CEDAR ST,,,"KLINE, ROBERT A",,,...,210000,CONVERSION,BARRETT,255500,0,402100,657600,5461.0,F_769446_3007971,3
2,07-036-188A,10022.0,31,,GOULD ST,,,GOLDSTEIN ALLAN,DADY-GOLDSTEIN DIANE M,,...,333000,,"LEDONNE,RICHARD M",254000,3700,212800,470500,2359.2,F_769416_3007700,1
3,02-006-20C,1003.0,,,PARKER RD,,,TOWN OF WAKEFIELD,BOARD OF SELECTMEN,,...,0,,,153500,0,0,153500,,F_766364_3011614,0
4,07-038-191A,10031.0,27,,GOULD ST,,,"PASSERINI, PATRICK A",JOYCE K PASSERINI,,...,64000,3,BERTRAND,255300,0,251800,507100,2963.0,F_769530_3007727,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9770,19-2M2-206B,39722,2,,MIDDLESEX ST,# 2,212,KOZACHEK CATHERINE,VERTOPI JETMIR,,...,1,CONVENIENCE,"TWO MIDDLESEX STREET LLC ,",0,0,550600,550600,3186.0,F_774102_3006007,1
9771,13-07B-034,22912,7,,BARTLEY ST,,213,FINN LAUREN & RYAN,,,...,1,CONVENIENCE,"JDR REALTY INVESTMENTS LLC,",0,0,628300,628300,2757.0,F_770501_3006591,1
9772,13-09B-034,22912,9,,BARTLEY ST,,213,YU SYNG M,YU BRITTANY K,,...,1,CONVENIENCE,"JDR REALTY INVESTMENTS LLC,",0,0,778600,778600,3878.0,F_770501_3006591,1
9773,24-225-82A,48274,1014,,MAIN ST,#1,214,NIDEN MATTHEW R,OMO NICOLETTA S,,...,1,CONVENIENCE,"PHAN,KY Q",0,0,473300,473300,1970.0,,1


In [17]:
def get_heat_system(_x):
    mapping = {
        None: None,
        "STEAM": "STEAM",
        "FORCED H/A": "DUCTED",
        "FORCED H/W": "NON-DUCTED",
        "UNIT HTRS": "NON-DUCTED",
        "ELECTRC BB": "NON-DUCTED",
        "RADIANT HW": "NON-DUCTED",
        "GRAVTY H/A": "NON-DUCTED",
        "FLOOR FURN": "NON-DUCTED",
        "NOT DUCTED": "NON-DUCTED",
        "AVERAGE": "NON-DUCTED",
        "WALL UNIT": "NON-DUCTED",
    }
    
    return mapping.get(_x["HEAT TYPE"])

In [18]:
df["Heat system"] = df.apply(get_heat_system, axis=1)

In [19]:
df

Unnamed: 0,PCL ID,Property Account Number,ST NU,ST ALT,LOCATION,CONDO_UNIT,COMPLEX_NAME,OWNER,OWNER 2,OWNER 3,...,ARMS Sales (2),GRANTOR LAST NAME (2),LAND VAL,YARD ITEMS,BUILD VAL,TOTAL VAL,ADJ AREA,STATE CODE,Number of units,Heat system
0,07-069-182C,10006.0,,,WALNUT ST,,,"MERULLO, WILLIAM D",WILLIAM J MERULLO,,...,,KEHOE,42000,14700,0,56700,,F_769337_3008027,0,
1,07-042-184A,10014.0,63,-63A,CEDAR ST,,,"KLINE, ROBERT A",,,...,CONVERSION,BARRETT,255500,0,402100,657600,5461.0,F_769446_3007971,3,STEAM
2,07-036-188A,10022.0,31,,GOULD ST,,,GOLDSTEIN ALLAN,DADY-GOLDSTEIN DIANE M,,...,,"LEDONNE,RICHARD M",254000,3700,212800,470500,2359.2,F_769416_3007700,1,DUCTED
3,02-006-20C,1003.0,,,PARKER RD,,,TOWN OF WAKEFIELD,BOARD OF SELECTMEN,,...,,,153500,0,0,153500,,F_766364_3011614,0,
4,07-038-191A,10031.0,27,,GOULD ST,,,"PASSERINI, PATRICK A",JOYCE K PASSERINI,,...,3,BERTRAND,255300,0,251800,507100,2963.0,F_769530_3007727,1,NON-DUCTED
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9770,19-2M2-206B,39722,2,,MIDDLESEX ST,# 2,212,KOZACHEK CATHERINE,VERTOPI JETMIR,,...,CONVENIENCE,"TWO MIDDLESEX STREET LLC ,",0,0,550600,550600,3186.0,F_774102_3006007,1,DUCTED
9771,13-07B-034,22912,7,,BARTLEY ST,,213,FINN LAUREN & RYAN,,,...,CONVENIENCE,"JDR REALTY INVESTMENTS LLC,",0,0,628300,628300,2757.0,F_770501_3006591,1,DUCTED
9772,13-09B-034,22912,9,,BARTLEY ST,,213,YU SYNG M,YU BRITTANY K,,...,CONVENIENCE,"JDR REALTY INVESTMENTS LLC,",0,0,778600,778600,3878.0,F_770501_3006591,1,DUCTED
9773,24-225-82A,48274,1014,,MAIN ST,#1,214,NIDEN MATTHEW R,OMO NICOLETTA S,,...,CONVENIENCE,"PHAN,KY Q",0,0,473300,473300,1970.0,,1,DUCTED


In [20]:
building_type_mapping_file = "../database/mappings/building_type_mapping.csv"
df_build_type_mapping = pd.read_csv(building_type_mapping_file)

In [21]:
df_build_type_mapping

Unnamed: 0,DESC,Building Type
0,PMP/VLV HS,Office
1,2 FAMILY,Small Multifamily
2,4-8 FAM,Large Residential
3,3 FAMILY,Small Multifamily
4,APRTMNT-GN,Large Residential
...,...,...
79,SUPER MKT,Supermarket
80,NIGHT CLUB,Resturant
81,BOWLING AL,Assembly
82,RINK,Assembly


In [22]:
mapping_dict = df_build_type_mapping.set_index("DESC").to_dict()["Building Type"]

In [109]:
def set_building_type(_x, mapping_dict):
    return mapping_dict.get(_x["DESC"])

In [24]:
df["Building Type"] = df.apply(set_building_type, axis=1, args=(mapping_dict,))

In [25]:
df

Unnamed: 0,PCL ID,Property Account Number,ST NU,ST ALT,LOCATION,CONDO_UNIT,COMPLEX_NAME,OWNER,OWNER 2,OWNER 3,...,GRANTOR LAST NAME (2),LAND VAL,YARD ITEMS,BUILD VAL,TOTAL VAL,ADJ AREA,STATE CODE,Number of units,Heat system,Building Type
0,07-069-182C,10006.0,,,WALNUT ST,,,"MERULLO, WILLIAM D",WILLIAM J MERULLO,,...,KEHOE,42000,14700,0,56700,,F_769337_3008027,0,,
1,07-042-184A,10014.0,63,-63A,CEDAR ST,,,"KLINE, ROBERT A",,,...,BARRETT,255500,0,402100,657600,5461.0,F_769446_3007971,3,STEAM,
2,07-036-188A,10022.0,31,,GOULD ST,,,GOLDSTEIN ALLAN,DADY-GOLDSTEIN DIANE M,,...,"LEDONNE,RICHARD M",254000,3700,212800,470500,2359.2,F_769416_3007700,1,DUCTED,
3,02-006-20C,1003.0,,,PARKER RD,,,TOWN OF WAKEFIELD,BOARD OF SELECTMEN,,...,,153500,0,0,153500,,F_766364_3011614,0,,
4,07-038-191A,10031.0,27,,GOULD ST,,,"PASSERINI, PATRICK A",JOYCE K PASSERINI,,...,BERTRAND,255300,0,251800,507100,2963.0,F_769530_3007727,1,NON-DUCTED,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9770,19-2M2-206B,39722,2,,MIDDLESEX ST,# 2,212,KOZACHEK CATHERINE,VERTOPI JETMIR,,...,"TWO MIDDLESEX STREET LLC ,",0,0,550600,550600,3186.0,F_774102_3006007,1,DUCTED,
9771,13-07B-034,22912,7,,BARTLEY ST,,213,FINN LAUREN & RYAN,,,...,"JDR REALTY INVESTMENTS LLC,",0,0,628300,628300,2757.0,F_770501_3006591,1,DUCTED,
9772,13-09B-034,22912,9,,BARTLEY ST,,213,YU SYNG M,YU BRITTANY K,,...,"JDR REALTY INVESTMENTS LLC,",0,0,778600,778600,3878.0,F_770501_3006591,1,DUCTED,
9773,24-225-82A,48274,1014,,MAIN ST,#1,214,NIDEN MATTHEW R,OMO NICOLETTA S,,...,"PHAN,KY Q",0,0,473300,473300,1970.0,,1,DUCTED,


In [59]:
# Some building DESCs are not included in the mapping... do we need to update the mapping?
df[df["Building Type"].isnull() & df["DESC"]]["DESC"]

1        FAMILY FLAT
2        DUTCH COLON
4       N E COLONIAL
5           CAPE COD
7           CAPE COD
            ...     
9770    CONDEX CONDO
9771    CONDEX CONDO
9772    CONDEX CONDO
9773    CONVER CONDO
9774    CONVER CONDO
Name: DESC, Length: 6277, dtype: object

In [98]:
# Need to update the mapping
mapping_2 = pd.read_csv("../database/mappings/building_type_mapping_2.csv")

In [99]:
df_build_type_mapping = df_build_type_mapping.set_index("DESC")

KeyError: "None of ['DESC'] are in the columns"

In [100]:
mapping_2 = mapping_2.applymap(lambda x: x.strip() if isinstance(x, str) else x)
mapping_2 = mapping_2.set_index("DESC")

In [101]:
df_build_type_mapping["Building Type"]

DESC
PMP/VLV HS               Office
2 FAMILY      Small Multifamily
4-8 FAM       Large Residential
3 FAMILY      Small Multifamily
APRTMNT-GN    Large Residential
                    ...        
SUPER MKT           Supermarket
NIGHT CLUB            Resturant
BOWLING AL             Assembly
RINK                   Assembly
POST OFF                 Retail
Name: Building Type, Length: 84, dtype: object

In [102]:
mapping_2

Unnamed: 0_level_0,Building Type
DESC,Unnamed: 1_level_1
FAMILY FLAT,Small Multifamily
DUTCH COLON,Single Family
N E COLONIAL,Single Family
CAPE COD,Single Family
COLONIAL,Single Family
...,...
MOBL HOME,Single Family
DORMITORY,Large Residential
INDUST CONDO,Industrial
PRE ENG MF,Warehouse


In [103]:
pd.merge(df_build_type_mapping, mapping_2, how="inner", on="DESC")

Unnamed: 0_level_0,Building Type_x,Building Type_y
DESC,Unnamed: 1_level_1,Unnamed: 2_level_1
STORE,Retail,Retail
COMM BLOCK,Warehouse,Office
COLONIAL,Single Family,Single Family
WAREHOUSE,Warehouse,Garage
RANCH,Single Family,Single Family
ANTIQUE,Single Family,Single Family
DORMITORY,Large Residential,Large Residential
BUNGALOW,Single Family,Small Multifamily
RESTAURANT,Resturant,Restaurant
FUNERAL HM,Retail,Office


In [None]:
# So there is some overlap, with a few discrepancies... we will need to address

In [115]:
full_mapping = pd.concat([df_build_type_mapping, mapping_2])
full_mapping.to_csv("../database/mappings/full_building_type_mappings.csv")
full_mapping = full_mapping.to_dict()["Building Type"]

In [107]:
df.loc[[1, 2, 3], :].apply(set_building_type, axis=1, args=(full_mapping,))

FAMILY FLAT
Small Multifamily
DUTCH COLON
Single Family
nan
None


1    Small Multifamily
2        Single Family
3                 None
dtype: object

In [111]:
df["Building Type"] = df.apply(set_building_type, axis=1, args=(full_mapping,))

In [112]:
df

Unnamed: 0,PCL ID,Property Account Number,ST NU,ST ALT,LOCATION,CONDO_UNIT,COMPLEX_NAME,OWNER,OWNER 2,OWNER 3,...,LAND VAL,YARD ITEMS,BUILD VAL,TOTAL VAL,ADJ AREA,STATE CODE,Number of units,Heat system,Building Type,Decade Built
0,07-069-182C,10006.0,,,WALNUT ST,,,"MERULLO, WILLIAM D",WILLIAM J MERULLO,,...,42000,14700,0,56700,,F_769337_3008027,0,,,
1,07-042-184A,10014.0,63,-63A,CEDAR ST,,,"KLINE, ROBERT A",,,...,255500,0,402100,657600,5461.0,F_769446_3007971,3,STEAM,Small Multifamily,1930.0
2,07-036-188A,10022.0,31,,GOULD ST,,,GOLDSTEIN ALLAN,DADY-GOLDSTEIN DIANE M,,...,254000,3700,212800,470500,2359.2,F_769416_3007700,1,DUCTED,Single Family,1910.0
3,02-006-20C,1003.0,,,PARKER RD,,,TOWN OF WAKEFIELD,BOARD OF SELECTMEN,,...,153500,0,0,153500,,F_766364_3011614,0,,,
4,07-038-191A,10031.0,27,,GOULD ST,,,"PASSERINI, PATRICK A",JOYCE K PASSERINI,,...,255300,0,251800,507100,2963.0,F_769530_3007727,1,NON-DUCTED,Single Family,1900.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9770,19-2M2-206B,39722,2,,MIDDLESEX ST,# 2,212,KOZACHEK CATHERINE,VERTOPI JETMIR,,...,0,0,550600,550600,3186.0,F_774102_3006007,1,DUCTED,Small Multifamily,2010.0
9771,13-07B-034,22912,7,,BARTLEY ST,,213,FINN LAUREN & RYAN,,,...,0,0,628300,628300,2757.0,F_770501_3006591,1,DUCTED,Small Multifamily,2010.0
9772,13-09B-034,22912,9,,BARTLEY ST,,213,YU SYNG M,YU BRITTANY K,,...,0,0,778600,778600,3878.0,F_770501_3006591,1,DUCTED,Small Multifamily,2010.0
9773,24-225-82A,48274,1014,,MAIN ST,#1,214,NIDEN MATTHEW R,OMO NICOLETTA S,,...,0,0,473300,473300,1970.0,,1,DUCTED,Small Multifamily,1900.0


In [26]:
df["Decade Built"] = ((df["YEAR BLT"].fillna(0) // 10) * 10).astype(int).replace(0, np.nan)

In [113]:
df.to_csv("./parcel_wakefield_final.csv", index=False)