Skip to content

Commit

Permalink
Maintaining tract in area median income tsv file and breaking out by …
Browse files Browse the repository at this point in the history
…state.
  • Loading branch information
joseph-robertson committed Aug 10, 2017
1 parent 5497d2d commit 5386cff
Show file tree
Hide file tree
Showing 51 changed files with 2,087,180 additions and 291,204 deletions.
26 changes: 12 additions & 14 deletions data/lmi/input.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def area_median_income(self):

# Preprocess LMI file into vintage, etc. enumerations we use (divide by two)
df['hfl index'] = df['hfl index'].apply(lambda x: assign_heating_fuel(x))
df = df.rename(columns={'ybl index': 'Dependency=Vintage', 'hfl index': 'Dependency=Heating Fuel', 'EPW': 'Dependency=Location EPW', 'puma10': 'Dependency=PUMA', 'county': 'Dependency=County'})
df = df.rename(columns={'ybl index': 'Dependency=Vintage', 'hfl index': 'Dependency=Heating Fuel', 'EPW': 'Dependency=Location EPW', 'tract_gisjoin': 'Dependency=Location Census Tract', 'county': 'Dependency=County'})

for vintage in [4, 3, 2]:
sub = df[df['Dependency=Vintage']==vintage].copy()
Expand All @@ -58,7 +58,7 @@ def area_median_income(self):

df['Dependency=Vintage'] = df['Dependency=Vintage'].map({'<1940': '<1950', '<1950': '<1950', '1950s': '1950s', '1960s': '1960s', '1970s': '1970s', '1980s': '1980s', '1990s': '1990s', '2000s': '2000s', '2010s': '2000s'})

df = df[np.concatenate([['Dependency=Vintage', 'Dependency=Heating Fuel', 'Dependency=Location EPW', 'Dependency=PUMA', 'Dependency=County'], options])]
df = df[np.concatenate([['Dependency=Vintage', 'Dependency=Heating Fuel', 'Dependency=Location EPW', 'Dependency=Location Census Tract', 'Dependency=County'], options])]
df = add_option_prefix(df)

return df
Expand Down Expand Up @@ -97,8 +97,7 @@ def add_option_prefix(df):
df = pd.read_sql(sql, con)
table_names = list(df['table_name'])
table_names = [x for x in table_names if not 'cities' in x]

tsvs = []

for i, table_name in enumerate(table_names):

print i+1, table_name
Expand All @@ -107,15 +106,14 @@ def add_option_prefix(df):
for category in ['Area Median Income']:
method = getattr(dfs, category.lower().replace(' ', '_'))
df = method()
tsvs.append(df)

df = pd.concat(tsvs)

df = df.groupby(['Dependency=Vintage', 'Dependency=Heating Fuel', 'Dependency=Location EPW', 'Dependency=PUMA', 'Dependency=County']).sum()

count = df.sum(axis=1)
df = df.div(df.sum(axis=1), axis=0)
df['Count'] = count

df.to_csv(os.path.join(datafiles_dir, '{}.tsv'.format(category)), sep='\t')
df = df.groupby(['Dependency=Vintage', 'Dependency=Heating Fuel', 'Dependency=Location EPW', 'Dependency=Location Census Tract', 'Dependency=County']).sum()

count = df.sum(axis=1)
df = df.div(df.sum(axis=1), axis=0)
df['Count'] = count

if not df.empty:
df = df.fillna(0)
df.to_csv(os.path.join(datafiles_dir, '{} {}.tsv'.format(category, table_name.split('_')[-1].upper())), sep='\t')

35,216 changes: 35,216 additions & 0 deletions project_resstock_national/housing_characteristics/Area Median Income AL.tsv

Large diffs are not rendered by default.

22,149 changes: 22,149 additions & 0 deletions project_resstock_national/housing_characteristics/Area Median Income AR.tsv

Large diffs are not rendered by default.

35,105 changes: 35,105 additions & 0 deletions project_resstock_national/housing_characteristics/Area Median Income AZ.tsv

Large diffs are not rendered by default.

204,976 changes: 204,976 additions & 0 deletions project_resstock_national/housing_characteristics/Area Median Income CA.tsv

Large diffs are not rendered by default.

32,940 changes: 32,940 additions & 0 deletions project_resstock_national/housing_characteristics/Area Median Income CO.tsv

Large diffs are not rendered by default.

27,715 changes: 27,715 additions & 0 deletions project_resstock_national/housing_characteristics/Area Median Income CT.tsv

Large diffs are not rendered by default.

3,509 changes: 3,509 additions & 0 deletions project_resstock_national/housing_characteristics/Area Median Income DC.tsv

Large diffs are not rendered by default.

8,616 changes: 8,616 additions & 0 deletions project_resstock_national/housing_characteristics/Area Median Income DE.tsv

Large diffs are not rendered by default.

88,089 changes: 88,089 additions & 0 deletions project_resstock_national/housing_characteristics/Area Median Income FL.tsv

Large diffs are not rendered by default.

53,647 changes: 53,647 additions & 0 deletions project_resstock_national/housing_characteristics/Area Median Income GA.tsv

Large diffs are not rendered by default.

0 comments on commit 5386cff

Please sign in to comment.