# Project Title: Property Usage
## By Go Code CO team "Tech Slope" - Aaron, Adam, Dex, Juli, Leo, Marc
### Version 0.1
### Last update: March 26, 2019





In [0]:
## Initiating

import pandas as pd
import time

list_county_scope=['Mesa','Denver']
list_year_scope=list(range(2010,2026))
list_age_bins=list(range(0,101,5)) 

### Task 1 Population
Ref: [Population Projections in Colorado](https://data.colorado.gov/Business/Business-Entities-in-Colorado/4ykn-tg5h)

In [0]:
df_pop = pd.read_csv('Population_Projections_in_Colorado.csv')
df_pop.head()

Unnamed: 0,county,fipsCode,year,age,malePopulation,femalePopulation,totalPopulation,dataType
0,Adams,1,1990,0,2354,2404,4758,Estimate
1,Adams,1,1990,1,2345,2375,4720,Estimate
2,Adams,1,1990,2,2413,2219,4632,Estimate
3,Adams,1,1990,3,2321,2261,4582,Estimate
4,Adams,1,1990,4,2433,2302,4735,Estimate


In [0]:
## Cleaning
### Filter data within the defined scope
df_pop_clean=df_pop[['county','fipsCode','year','age','totalPopulation','femalePopulation','malePopulation']][df_pop['year'].isin(list_year_scope)][df_pop['county'].isin(list_county_scope)]
### Rename columns
df_pop_clean=df_pop_clean.rename(index=str, columns={"county": "County", "fipsCode": "FIPS","year": "Year","age": "Age","totalPopulation": "Total","femalePopulation": "Female","malePopulation": "Male"})
print(df_pop_clean.tail(),'\n','---------','\n')
print('Shape of the current dataframe:',df_pop_clean.shape,'\n')
print('Missing value counts:')
print(df_pop_clean.shape[0]-df_pop_clean.count())

        County  FIPS  Year  Age  Total  Female  Male
381138    Mesa    77  2017   48   1688     846   842
381168    Mesa    77  2013   24   1994     999   995
381235  Denver    31  2024   10   7301    3537  3764
381241    Mesa    77  2012   76    860     454   406
381429    Mesa    77  2024   21   2212    1061  1151 
 --------- 

Shape of the current dataframe: (3232, 7) 

Missing value counts:
County    0
FIPS      0
Year      0
Age       0
Total     0
Female    0
Male      0
dtype: int64


  """Entry point for launching an IPython kernel.


In [0]:
## Aggregating 
### By age bins
bins = pd.cut(df_pop_clean['Age'],list_age_bins)
df_pop_clean_aggbyagebins = df_pop_clean.groupby(['County','FIPS','Year',bins]).sum()
df_pop_clean_aggbyagebins=df_pop_clean_aggbyagebins.drop(columns='Age').reset_index()
df_pop_clean_aggbyagebins = df_pop_clean_aggbyagebins[df_pop_clean_aggbyagebins.Total>0]
df_pop_clean_aggbyagebins.head()

Unnamed: 0,County,FIPS,Year,Age,Total,Female,Male
0,Denver,31,2010,"(0, 5]",42998.0,20976.0,22022.0
1,Denver,31,2010,"(5, 10]",36584.0,18058.0,18526.0
2,Denver,31,2010,"(10, 15]",29674.0,14701.0,14973.0
3,Denver,31,2010,"(15, 20]",34212.0,17201.0,17011.0
4,Denver,31,2010,"(20, 25]",51585.0,26248.0,25337.0


In [0]:
### By year
df_pop_clean_aggbyyear = df_pop_clean_aggbyagebins.groupby(['County','FIPS','Year']).sum().reset_index()
df_pop_clean_aggbyyear = df_pop_clean_aggbyyear[df_pop_clean_aggbyyear.Total>0]
df_pop_clean_aggbyyear.head()

Unnamed: 0,County,FIPS,Year,Total,Female,Male
0,Denver,31,2010,595677.0,297846.0,297831.0
1,Denver,31,2011,610943.0,305482.0,305466.0
2,Denver,31,2012,625205.0,312593.0,312619.0
3,Denver,31,2013,638798.0,319322.0,319482.0
4,Denver,31,2014,653408.0,326525.0,326887.0


In [0]:
## Save/Checkpoint
def write_checkpoint(df, dfname):
  filename=time.strftime("%Y%m%d-%H%M")+' '+ dfname+'.csv'
  df.to_csv(filename, index=False)
  return

def read_checkpoint(filename):
    return pd.read_csv(filename,index_col=0)
  
write_checkpoint(df_pop_clean_aggbyyear,'Population_year')
write_checkpoint(df_pop_clean_aggbyagebins,'Population_agebins')
write_checkpoint(df_pop_clean,'Population_clean')

###### df_pop_clean_aggbyyear=read_checkpoint('20190326-0136 Population_clean.csv')
###### df_pop_clean_aggbyyear.head()
