In [1]:
# %load Zillow_Explore_avl.py
# Dependencies
import sys
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
import datetime

In [3]:
# ---------------
# Replace with dataset_by_county structure and lookup
# ---------------
# Initialize directory structure relative to notebook location
nb_loc = os.getcwd()
zecon_dataset_loc = os.path.join(nb_loc,'datasets','zecon')
datasets = os.listdir(zecon_dataset_loc)

# hard-coded filenames to use
files_used = ['City_time_series.csv',
             'cities_crosswalk.csv']

# Import fileData
crosswalk = pd.read_csv(os.path.join(zecon_dataset_loc,files_used[1]))
city_time_series = pd.read_csv(os.path.join(zecon_dataset_loc,files_used[0]))

#Merge datasets
combine = city_time_series.merge(crosswalk,right_on = 'Unique_City_ID', left_on = 'RegionName')
ca_data = combine.loc[combine['State']=='CA']

In [None]:
def str2unix(s):
    return time.mktime(datetime.datetime.strptime(s, "%Y-%m-%d").timetuple())
def convert_timescale(x):
    return x/(60*60*24*365)
def extract_year(x):
    return datetime.datetime.strptime(x, "%Y-%m-%d").timetuple().tm_year
def express_as_year(x):
    return convert_timescale(str2unix(x))%1+extract_year(x)

In [None]:
county_list = ca_data['County'].unique()
figure_directory = 'County_Median_Price_Per_Sqft'
# try:
#     os.stat(figure_directory)
# except:
#     os.mkdir(figure_directory)
for county in county_list:
    this_county_data = ca_data.loc[ca_data['County']==county]
    regions = this_county_data['City'].unique()
    plt.figure(figsize=[20,10])
    for region in regions:
    #     plt.figure(figsize=[20,6])
        dates = this_county_data.loc[sample['City']==region,'Date'].apply(express_as_year)
        price = this_county_data.loc[sample['City']==region,'MedianSoldPricePerSqft_AllHomes']
        plt.plot(dates,price,'.',label=region)
    plt.title('Median Sold Price per Sq. Ft. for %s County' % (county))
    plt.grid(True)
    plt.ylabel('$/Sq.Ft.')
    plt.xlabel('Years')
    plt.legend()
    plt.xlim([1995,2025])
    
    savepath = os.path.join(figure_directory,county.replace(' ','')+'.png')
#     plt.savefig(savepath)
#     plt.ylim([0,1000])
    plt.show()

In [6]:
combine_keys = combine.keys()
metrics = pd.Series([key.split('_')[0] for key in combine_keys if '_'in key]).unique()
modifiers = pd.Series([key.split('_')[1] for key in combine_keys if '_'in key]).unique()

In [7]:
modifiers

array(['AllHomes', '1Bedroom', '2Bedroom', '3Bedroom', '4Bedroom',
       '5BedroomOrMore', 'CondoCoop', 'DuplexTriplex',
       'SingleFamilyResidence', 'MultiFamilyResidence5PlusUnits', 'Studio',
       '1bedroom', '2bedroom', '3bedroom', '4bedroom', 'BottomTier',
       'MiddleTier', 'TopTier', 'AllHomesPlusMultifamily',
       'MultiFamilyResidenceRental', 'SingleFamilyResidenceRental', 'City'], dtype=object)

In [8]:
metrics

array(['HomesSoldAsForeclosuresRatio', 'InventorySeasonallyAdjusted',
       'InventoryRaw', 'MedianListingPricePerSqft', 'MedianListingPrice',
       'MedianPctOfPriceReduction', 'MedianPriceCutDollar',
       'MedianRentalPricePerSqft', 'MedianRentalPrice',
       'MedianSoldPricePerSqft', 'MedianSoldPrice', 'ZHVIPerSqft',
       'PctOfHomesDecreasingInValues', 'PctOfHomesIncreasingInValues',
       'PctOfHomesSellingForGain', 'PctOfHomesSellingForLoss',
       'PctOfListingsWithPriceReductionsSeasAdj',
       'PctOfListingsWithPriceReductions',
       'PctTransactionsThatArePreviouslyForeclosuredHomes',
       'PriceToRentRatio', 'Turnover', 'ZHVI', 'ZRI', 'ZriPerSqft', 'Zri',
       'Unique'], dtype=object)

In [14]:
tree=[]
for metric in metrics:
    tree.append({metric:[key.split('_')[1] for key in combine_keys if (metric+'_')in key]})

In [15]:
tree

[{'HomesSoldAsForeclosuresRatio': ['AllHomes']},
 {'InventorySeasonallyAdjusted': ['AllHomes']},
 {'InventoryRaw': ['AllHomes']},
 {'MedianListingPricePerSqft': ['1Bedroom',
   '2Bedroom',
   '3Bedroom',
   '4Bedroom',
   '5BedroomOrMore',
   'AllHomes',
   'CondoCoop',
   'DuplexTriplex',
   'SingleFamilyResidence']},
 {'MedianListingPrice': ['1Bedroom',
   '2Bedroom',
   '3Bedroom',
   '4Bedroom',
   '5BedroomOrMore',
   'AllHomes',
   'CondoCoop',
   'DuplexTriplex',
   'SingleFamilyResidence']},
 {'MedianPctOfPriceReduction': ['AllHomes',
   'CondoCoop',
   'SingleFamilyResidence']},
 {'MedianPriceCutDollar': ['AllHomes', 'CondoCoop', 'SingleFamilyResidence']},
 {'MedianRentalPricePerSqft': ['1Bedroom',
   '2Bedroom',
   '3Bedroom',
   '4Bedroom',
   '5BedroomOrMore',
   'AllHomes',
   'CondoCoop',
   'DuplexTriplex',
   'MultiFamilyResidence5PlusUnits',
   'SingleFamilyResidence',
   'Studio']},
 {'MedianRentalPrice': ['1Bedroom',
   '2Bedroom',
   '3Bedroom',
   '4Bedroom',
   '5