# Find the comparables: exterior.txt

The file `exterior.txt` contains important property information about the areas of the property sections. Let's load this file and grab a subset with the important columns to continue our study.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from pathlib import Path
import pickle

import pandas as pd

from src.definitions import ROOT_DIR
from src.data.utils import Table, save_pickle

In [None]:
exterior_fn = ROOT_DIR / 'data/external/2016/Real_building_land/exterior.txt'
assert exterior_fn.exists()

In [None]:
exterior = Table(exterior_fn, '2016')

In [None]:
exterior_df = exterior.get_df()

# Load accounts of interest
Let's remove the account numbers that don't meet free-standing single-family home criteria that we found while processing the `building_res.txt` file.

In [None]:
one_bld_in_acct_fn = ROOT_DIR / 'data/raw/2016/one_bld_in_acct.pickle'

In [None]:
with open(one_bld_in_acct_fn, 'rb') as f:
    one_bld_in_acct = pickle.load(f)

In [None]:
cond0 = exterior_df['acct'].isin(one_bld_in_acct)
exterior_df = exterior_df.loc[cond0, :]

In [None]:
exterior_df.head()

In [None]:
exterior_df.sar_dscr.value_counts().head(15)

# Grab slice of the exterior features of interest
With the value counts on the exterior features description performed above we can see that the majority of the features land in the top 10 categories. Let's filter out the rests of the columns.

In [None]:
cols = exterior_df.sar_dscr.value_counts().head(10).index

In [None]:
cond0 = exterior_df['sar_dscr'].isin(cols)
exterior_df = exterior_df.loc[cond0, :]

# Build pivot table
Let's build a pivot table with the account number (`acct`) as index, surface area `sar_dscr` as column, and `area` as values.

In [None]:
exterior_pivot = exterior_df.pivot_table(index='acct',
                                         columns='sar_dscr',
                                         values='area',
                                         fill_value=0)

In [None]:
exterior_pivot.head()

add `acct` column to make easier the merging process ahead

In [None]:
exterior_pivot.reset_index(inplace=True)

In [None]:
assert exterior_pivot['acct'].is_unique

# Export real_acct

In [None]:
save_fn = ROOT_DIR / 'data/raw/2016/exterior_comps.pickle'
save_pickle(exterior_pivot, save_fn)