In [6]:
import pandas as pd
import numpy as np
from pyproj import CRS, Transformer

In [2]:
accepted_codes = ['910','911','912','913','914','915','916','917','918','919','920','921','922', \
                  '923','924','925','926','927','928','929','970','971','972','973','974','975']

def filter_luc(dataframe):
    """filters by land use codes affiliated with MA state agencies
    """
    return dataframe[dataframe['luc_1'].isin(accepted_codes) | \
            dataframe['luc_2'].isin(accepted_codes) | \
            dataframe['luc_adj_1'].isin(accepted_codes)| \
            dataframe['luc_adj_2'].isin(accepted_codes)]

def filter_poly_typ(dataframe):
    # filter out data only with poly_typ equal to FEE or TAX, returns pandas dataframe
    accepted_codes = ['FEE', 'TAX']
    return dataframe[dataframe['poly_typ'].isin(accepted_codes)]

def filter_bldg(dataframe):
    '''
    Filter on related columns that indicate whether building(s) are present on the land parcel.
    Removes rows that correspond to land parcels that do not contain buildings.
    Ziba specified: 
        bldg_value - for condos, generally includes land value
        bldg_area - may include garages, stairwells, basements, and other uninhabitable areas.
        bldgv_psf - building value $ per sq foot
    Additional: 
        sqm_bldg - parcel area estimated to be covered by a building (sq meters)
        pct_bldg - % parcel area estimated to be covered by a building 
    '''
    
    return dataframe.query('bldg_value > 0 | \
                           bldg_area > 0 | \
                           bldgv_psf > 0 | \
                           sqm_bldg > 0 | \
                           pct_bldg > 0')

In [4]:
df = pd.read_csv('../data/land_parcel/mapc.ma_parcels_metrofuture.csv', \
                 dtype={'addr_zip': np.string_}) # zip codes need to be imported as string in order to retain leading 0

land_parcel_df = filter_luc(df)
land_parcel_df = filter_poly_typ(land_parcel_df)

# need to reset indices to account for removal of rows from filtering
land_parcel_df = land_parcel_df.reset_index()

  interactivity=interactivity, compiler=compiler, result=result)


In [8]:
# add columns to store longitude and latitude coordinates
land_parcel_df['longitude'] = np.nan
land_parcel_df['latitude'] = np.nan

In [10]:
# convert parloc_id column to longitude, latitude coordinates
crs_4326 = CRS.from_epsg(4326) # target spatial reference to transform to
crs_26986 = CRS.from_epsg(26986) # for coordinates in meters
crs_3586 = CRS.from_epsg(3586) # for coordinates in feet

transformer_meters = Transformer.from_crs(crs_26986, crs_4326, always_xy=True)
transformer_feet = Transformer.from_crs(crs_3586, crs_4326, always_xy=True)

In [23]:
for i in range(len(land_parcel_df)):
    # make sure there are no whitespaces
    parloc_id = str(land_parcel_df.at[i, 'parloc_id']).replace(" ", "")
    land_parcel_df.at[i, 'parloc_id'] = parloc_id
    
    if (parloc_id.startswith('F')):
        end_x_idx = parloc_id.find('_', 2)
        x = parloc_id[2:end_x_idx]
        y = parloc_id[end_x_idx+1:]
        longitude, latitude = transformer_feet.transform(x, y)
        land_parcel_df.at[i, 'longitude'] = longitude
        land_parcel_df.at[i, 'latitude'] = latitude
    elif (parloc_id.startswith('M')):
        end_x_idx = parloc_id.find('_', 2)
        x = parloc_id[2:end_x_idx]
        y = parloc_id[end_x_idx+1:]
        longitude, latitude = transformer_meters.transform(x, y)
        land_parcel_df.at[i, 'longitude'] = longitude
        land_parcel_df.at[i, 'latitude'] = latitude
    else:
        print('At index ', i, ' parloc_id does not start with M_ or F_')

In [26]:
land_parcel_df.loc[land_parcel_df['latitude'].notna(), 'latitude']

0       42.126657
1       42.125234
2       42.125878
3       42.117266
4       42.115685
          ...    
7714    42.686681
7715    42.686224
7716    42.655239
7717    42.628150
7718    42.690427
Name: latitude, Length: 7719, dtype: float64

In [29]:
land_parcel_df.to_csv('./State-Surplus-TeamTaylor/data/state_land_plus_long_lat.csv',index=False)