In [1]:
import pandas as pd
import json
import os
import numpy as np

# Functions to Read Data File

###  Function To Import Housing Inventory Data 

In [2]:
def read_housing_inventory_data():
    # Reading Housing List prices from `Resources/RDC_Inventory_Core_Metrics_County_History.csv`
    house_listing_df = pd.read_csv('Resources/RDC_Inventory_Core_Metrics_County_History.csv')
    # Changing the date column format to yyyy-mm-dd
    house_listing_df['month_date_yyyymm'] = pd.to_datetime(house_listing_df['month_date_yyyymm'], format='%Y%m')

    # Renaming the column `month_date_yyyymm` to `Date`
    house_listing_df.rename(columns={'month_date_yyyymm':'Date'}, inplace=True)

    # Creating new temporary Dataframe with split values of the column
    temp_county_state_df = house_listing_df['county_name'].str.split(",", n =1 , expand = True)

    #Re assigning county_name from the temporary Dataframe
    house_listing_df['county_name'] = temp_county_state_df[0].str.strip()

    #Creating a new column for State
    house_listing_df['state'] = temp_county_state_df[1].str.strip()

    # Selecting Data for Washington State
    wa_house_listing_df = house_listing_df[house_listing_df['state'] == 'wa']
    wa_house_listing_df['county_name'] = wa_house_listing_df['county_name'].str.title()

    # Sorting Records by Date and County Name
    wa_house_listing_df = wa_house_listing_df.sort_values(['Date' , 'county_name'], ascending = (True, True)).reset_index(drop = True)

    return wa_house_listing_df

# Function to define Custom Rental DataFrame Transform

In [3]:
def custom_transform(rental_dataframe):
    
    # Getting Date Column Names seperately to create a seperate column for Date
    dates = rental_dataframe.columns[3:]
    
    # Creating new Series to store the type and value of Rental data(eg : City/County)
    col_name = rental_dataframe.columns[0]
    col_values = pd.Series(rental_dataframe.iloc[:,0])
    
    # Creating new DataFrame `col_details` for col_name(City_Name/County_Name) and `FIPS_Code`
    col_details = rental_dataframe.loc[:,[col_name , 'FIPS_Code']]
    
    # Creating a new DataFrame with values as the cross product of `dates` list and `col_values` list
    cross_product_df =  pd.MultiIndex.from_product([dates, col_values], names = ['Date', col_name])
    cross_product_df = pd.DataFrame(index = cross_product_df).reset_index()
    
    # Merging the new DataFrame with `col_details` DataFrame
    result_df = pd.merge(left = cross_product_df, right=col_details, left_on=col_name, right_on=col_name)
    result_df = result_df.sort_values(by=['Date',col_name] , ignore_index = True)
    
    
    rental_dataframe.set_index('FIPS_Code' , inplace=True)
    rental_dataframe = rental_dataframe.replace('     NA', np.NaN)
    
    for index, row in result_df.iterrows():
        date_col = row['Date']
        FIPS_Code = row['FIPS_Code']
        result_df.at[index, 'Average_Rental_Price'] = rental_dataframe.loc[FIPS_Code,date_col]
        
    result_df['Date'] = pd.to_datetime(result_df['Date'], format='%Y_%m')
    return result_df
    

# Function To Importing Rental Data 

In [4]:
def read_rental_inventory():
    # Reading Housing List prices from `Resources/RDC_Inventory_Core_Metrics_County_History.csv`
    rental_county_df = pd.read_csv("Resources/rental_growth_county.csv")
    rental_city_df = pd.read_csv("Resources/rental_growth_city.csv")

    rental_county_df['County_Name'] = (rental_county_df['County_Name'].str.split(" ", n =1 , expand = True))[0]
    rental_city_df['City_Name'] = (rental_city_df['City_Name'].str.split(",", n =1 , expand = True))[0]
    
    rental_county_df = custom_transform(rental_county_df)
    rental_city_df = custom_transform(rental_city_df)

    return [rental_county_df , rental_city_df]


# Function to define custom Dataframe Transform

In [5]:
def custom_dataframe_transform(building_df , df_column_type):
    # Getting Date Column Names seperately to create a seperate column for Date
    dates = building_df.columns[1:]
    
    # Creating new Series to store the type and value of Original data(eg : City/County)
    col_name = building_df.columns[0]
    col_values = pd.Series(building_df.iloc[:,0])
    
    # Creating a new DataFrame with values as the cross product of `dates` list and `col_values` list
    result_df =  pd.MultiIndex.from_product([dates, col_values], names = ['Date', col_name])
    result_df = pd.DataFrame(index = result_df).reset_index()
    
    building_df.set_index(col_name , inplace=True)
    
    for index, row in result_df.iterrows():
        col_identifier = row['Date']
        row_identifier = row[col_name]
        result_df.at[index, df_column_type] = building_df.loc[row_identifier,col_identifier]
        
    return result_df 

# Function To Import Building Completion Units Data

In [6]:
def read_building_completion_data():
    # Reading Building Completion Units Data from 
    building_completion_county_df = pd.read_csv("Resources/building_completion_units_county_wise.csv" , thousands=',')
    building_completion_city_df = pd.read_csv("Resources/building_completion_units_city_wise.csv" , thousands=',')

    building_completion_county_df['COUNTIES'] = (building_completion_county_df['COUNTIES'].str.split(" ", n =1 , expand = True))[0]
    
    # Calling `custom_dataframe_transform` function on `building_completion_county_df` DataFrame and `building_completion_city_df` DataFrame

    df_column_type = 'Units_Completed'
    building_completion_county_df = custom_dataframe_transform(building_completion_county_df , df_column_type)
    building_completion_city_df = custom_dataframe_transform(building_completion_city_df , df_column_type)
    
    return [building_completion_county_df, building_completion_city_df]


# Function To Import Building Permits Data

In [7]:
def read_building_permits_data():
    building_permit_county_df = pd.read_csv("Resources/building_permit_county_wise.csv" , thousands=',')
    building_permit_city_df = pd.read_csv("Resources/building_permit_city_wise.csv" , thousands=',')

    building_permit_county_df['COUNTIES'] = (building_permit_county_df['COUNTIES'].str.split(" ", n =1 , expand = True))[0]
    
    # Calling `custom_dataframe_transform` function on `building_permit_county_df` DataFrame and `building_permit_city_df` DataFrame
    df_column_type = 'Units_Permitted'
    building_permit_county_df = custom_dataframe_transform(building_permit_county_df , df_column_type)
    building_permit_city_df = custom_dataframe_transform(building_permit_city_df , df_column_type)
    
    return [building_permit_county_df, building_permit_city_df]

# Function To Import Labor and Employment Data

In [8]:
def labor_emp_transform(data_df ,df_type ):
    data_df = data_df.stack()
    data_df = pd.DataFrame(data_df.reset_index())
    data_df.columns = ['Date','FIPS_CODE',df_type+'_NO.']
    return data_df

In [9]:
def read_labor_data():
    wa_labor_force_df = pd.read_csv('Resources/wa_labor_force_by_county.csv',
                                 index_col=0,infer_datetime_format=True,
                                  parse_dates=True
                                 )

    # Calling `labor_emp_transform` function for DataFrame
    wa_labor_force_df = labor_emp_transform(wa_labor_force_df, "LABOR")
    return wa_labor_force_df

In [10]:
def read_employment_data():
    wa_employment_df = pd.read_csv('Resources/wa_employment_by_county.csv',
                                 index_col=0,infer_datetime_format=True,
                                  parse_dates=True)

    # Calling `labor_emp_transform` function for DataFrame
    wa_employment_df = labor_emp_transform(wa_employment_df, "EMP")
    return wa_employment_df

# CALLING FUNCTIONS TO READ ALL DATA

### Inventory List Data

In [11]:
wa_house_listing_county_df = read_housing_inventory_data()
wa_house_listing_county_df.head(2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,Date,county_fips,county_name,median_listing_price,median_listing_price_mm,median_listing_price_yy,active_listing_count,active_listing_count_mm,active_listing_count_yy,median_days_on_market,...,average_listing_price_mm,average_listing_price_yy,total_listing_count,total_listing_count_mm,total_listing_count_yy,pending_ratio,pending_ratio_mm,pending_ratio_yy,quality_flag,state
0,2016-07-01,53001,Adams,125000.0,,,34,,,87.0,...,,,36,,,0.058824,,,0,wa
1,2016-07-01,53003,Asotin,220000.0,,,120,,,61.0,...,,,120,,,,,,0,wa


### Rental Data County and City wise.

In [12]:
rental_df = read_rental_inventory()
rental_county_df = rental_df[0]
rental_city_df = rental_df[1]

display(rental_county_df.head(2))
display(rental_city_df.head(2))

Unnamed: 0,Date,County_Name,FIPS_Code,Average_Rental_Price
0,2017-01-01,Benton,53005,
1,2017-01-01,Clark,53011,1294.0


Unnamed: 0,Date,City_Name,FIPS_Code,Average_Rental_Price
0,2017-01-01,Auburn,5303180,1278
1,2017-01-01,Bellevue,5305210,2063


### Building Completion Data County and Citywise.

In [13]:
building_completion_df = read_building_completion_data()
building_completion_county_df = building_completion_df[0]
building_completion_city_df = building_completion_df[1]

# Displaying the transformed Building Units DataFrames.
display(building_completion_county_df.head(2))
display(building_completion_city_df.head(2))

Unnamed: 0,Date,COUNTIES,Units_Completed
0,2016/17,Adams,146.0
1,2016/17,Asotin,29.0


Unnamed: 0,Date,CITIES,Units_Completed
0,2016/17,Aberdeen,6.0
1,2016/17,Airway Heights,31.0


### Building Permits Data County and Citywise.

In [14]:
building_permit_df = read_building_permits_data()
building_permit_county_df = building_permit_df[0]
building_permit_city_df = building_permit_df[1]

# Displaying the transformed Building Unit Permits DataFrames.
display(building_permit_county_df.head(2))
display(building_permit_city_df.head(2))

Unnamed: 0,Date,COUNTIES,Units_Permitted
0,2016,Adams,31.0
1,2016,Asotin,63.0


Unnamed: 0,Date,CITIES,Units_Permitted
0,2016,Aberdeen,5.0
1,2016,Airway Heights,33.0


### Labor and Employment Data Countywise.

In [15]:
# Reading Countywise Labor Force Data From `wa_labor_force_by_county.csv` 
wa_labor_force_df = read_labor_data()

# Reading Countywise Labor Force Data From `wa_employment_by_county.csv`
wa_employment_df = read_employment_data()

display(wa_labor_force_df.head(2))
display(wa_employment_df.head(2))

Unnamed: 0,Date,FIPS_CODE,LABOR_NO.
0,2017-01-01,53011,226512.0
1,2017-01-01,53025,41475.0


Unnamed: 0,Date,FIPS_CODE,EMP_NO.
0,2017-01-01,53011,213014.0
1,2017-01-01,53025,37313.0
