In [20]:
import quandl
import pgeocode
import re
import numpy as np
import pandas as pd
import time
import threading

# logging warnings and errors
import logging
import sys
from io import StringIO

In [21]:
logging.basicConfig(filename='log_file.log',
                    level=logging.DEBUG,
                    format='%(asctime)s %(message)s',
                    datefmt='%m/%d/%Y %I:%M:%S %p',
                    filemode='a')
logger = logging.getLogger()

In [22]:
states = ["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DC", "DE", "FL", "GA", 
          "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", 
          "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", 
          "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC", 
          "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"]

In [31]:
def track_api_calls(calls_made, calls_last_10s, calls_last_10m):

    if calls_made >= max_calls_per_day:
        logger.warning('API call limit reached for the day')
        return

    if calls_last_10s >= max_calls_per_10_seconds:
        elapsed_time = time.time() - last_10s_time
        if elapsed_time < 10:
            logger.warning('Number of API calls per 10 Seconds limit reached')
            time.sleep(10 - elapsed_time)
        calls_last_10s = 0

    if calls_last_10m >= max_calls_per_10_minutes:
        elapsed_time = time.time() - last_10m_time
        if elapsed_time < 600:
            logger.warning('Number of API calls per 10 Minutes limit reached')
            time.sleep(600 - elapsed_time)
        calls_last_10m = 0
    return('current_calls: ',calls_made)



In [24]:
def check_state(search_str):
    search_str_list = [x.strip() for x in search_str.split(';')]
    for x in search_str_list:
        if x in states:
            return x
        

def check_county(search_str):
    search_str_list = [x.strip() for x in search_str.split(';')]
    for x in search_str_list:
        if 'county' in x.lower():
            return x
        
        
def check_city(search_str):
    search_str_list = [x.strip() for x in search_str.split(';')]
    if len(search_str_list) == 1:
        return np.nan
    if 'county' not in search_str_list[-1].lower():
        return search_str_list[-1]
    
def check_metro(search_str):
    search_str_list = [x.strip() for x in search_str.split(';')]
    if len(search_str_list) <= 3:
        return np.nan
    if 'county' not in search_str_list[2].lower():
        return search_str_list[2]    
    
def quandl_data(indicator_id,region_id=None,start_date=None):
    return quandl.get_table('ZILLOW/DATA', indicator_id=indicator_id, region_id=region_id)

In [25]:
def get_data(tracked_indicators, region_ids):
    calls_made = 0
    calls_last_10s = 0
    calls_last_10m = 0
    lock = threading.Semaphore(1)
    # create empty dataframe
    df_data = pd.DataFrame(columns=['indicator_id', 'region_id','date','value'])

    loops_made = 0
    max_no_loop = len(tracked_indicators)+len(region_ids)+2
    
    for ind in tracked_indicators:
        for region in region_ids:
            lock.acquire()
            try:
                if calls_made >= 50000:
                    logger.warning('API call limit reached for the day')
                    return
                
                if calls_last_10s >= 300:
                    elapsed_time = time.time() - last_10s_time
                    if elapsed_time < 10:
                        time.sleep(10 - elapsed_time)
                    calls_last_10s = 0
                
                if calls_last_10m >= 2000:
                    elapsed_time = time.time() - last_10m_time
                    if elapsed_time < 600:
                        time.sleep(600 - elapsed_time)
                    calls_last_10m = 0
                
                data = quandl_data(ind, region)
                if data.empty is False:
                    
                    data.reset_index(inplace=True)
                    data = data[['indicator_id', 'region_id','date','value']]
                    df_data = pd.concat([df_data, data], axis=0, ignore_index=True)
                    logger.info(f'Indicator: {ind} - Region: {region} - Successfully Ingested')
                
                calls_made += 1
                calls_last_10s += 1
                calls_last_10m += 1
                loops_made += 1
                
                last_10s_time = time.time()
                last_10m_time = time.time()
                
                if loops_made == max_no_loop:
                    return df_data
                
            except Exception as e:
                logger.error(f'Region: {region} - Indicator: {ind} - {e}')
            
            finally:
                lock.release()


In [26]:
quandl_data('ZSFH', 99999).

SyntaxError: invalid syntax (2225283256.py, line 1)

In [27]:
def get_data(tracked_indicators, region_ids):
    calls_made = 0
    calls_last_10s = 0
    calls_last_10m = 0
    lock = threading.Semaphore(1)
    # create empty dataframe
    df_data = pd.DataFrame(columns=['indicator_id', 'region_id','date','value'])

    loops_made = 0
    max_no_loop = len(tracked_indicators)+len(region_ids)+2
    
    for ind in tracked_indicators:
        for region in region_ids:
            lock.acquire()
            try:
                if calls_made >= max_calls_per_day:
                    logger.warning('API call limit reached for the day')
                    return
                
                if calls_last_10s >= max_calls_per_10_seconds:
                    elapsed_time = time.time() - last_10s_time
                    if elapsed_time < 10:
                        time.sleep(10 - elapsed_time)
                    calls_last_10s = 0
                
                if calls_last_10m >= max_calls_per_10_minutes:
                    elapsed_time = time.time() - last_10m_time
                    if elapsed_time < 600:
                        time.sleep(600 - elapsed_time)
                    calls_last_10m = 0
                
                data = quandl_data(ind, region)
                if data.empty is False:
                    
                    data.reset_index(inplace=True)
                    data = data[['indicator_id', 'region_id','date','value']]
                    df_data = pd.concat([df_data, data], axis=0, ignore_index=True)
                    logger.info(f'Indicator: {ind} - Region: {region} - Successfully Ingested')
                
                calls_made += 1
                calls_last_10s += 1
                calls_last_10m += 1
                loops_made += 1
                
                last_10s_time = time.time()
                last_10m_time = time.time()
                
                if loops_made == max_no_loop:
                    return df_data
                
            except Exception as e:
                logger.error(f'Region: {region} - Indicator: {ind} - {e}')
            
            finally:
                lock.release()


In [28]:
indicators_file = 'zillow_indicators.csv'
inicators_track = 'zillow_indicators_ingest.csv'
api_key = 'xuisyPUDscg1rq-HiMz7'
quandl.ApiConfig.api_key = api_key

In [29]:
max_calls_per_day = 50000
max_calls_per_10_seconds = 300
max_calls_per_10_minutes = 2000
concurrency_limit = 1

In [32]:
# get indicators
df_ind = quandl.get_table("ZILLOW/INDICATORS",paginate=True)

# get regions by zip
df_regions = quandl.get_table("ZILLOW/REGIONS",paginate=True)
df_regions_zip = df_regions[df_regions['region_type']=='zip']

In [33]:
df_regions_zip = df_regions_zip.copy()
df_regions_zip['region_str_len'] = df_regions_zip.apply(lambda x: len(x['region'].split(';')),axis=1)
df_regions_zip['zip'] = df_regions_zip.apply(lambda x: re.search('(\d{5})',x['region']).group(),axis=1)
df_regions_zip['state'] = df_regions_zip.apply(lambda x: check_state(x['region']),axis=1)
df_regions_zip['county'] = df_regions_zip.apply(lambda x: check_county(x['region']),axis=1)
df_regions_zip['city'] = df_regions_zip.apply(lambda x: check_city(x['region']),axis=1)
df_regions_zip['metro'] = df_regions_zip.apply(lambda x: check_metro(x['region']),axis=1)

In [34]:
region_ids = df_regions_zip['region_id']
indicator_ids = df_ind['indicator_id']

In [22]:
# save indicator
df_ind.to_csv(indicators_file)

In [23]:
# select indicator 
indicators = pd.read_csv(inicators_track)
tracked_indicators = indicators[indicators['ingest']=='Y']['indicator_id']

In [26]:
# def append_data(tracked_indicators,region_ids):
#     for ind in tracked_indicators:
#         for region in region_ids:
#             try:
#                 data = get_data(ind,region)
#                 if data.empty is False:
#                     data.reset_index(inplace=True)
#                     data = data[['indicator_id', 'region_id','date','value']]
#                     df_data = pd.concat([df_data, data], axis=0,ignore_index=True)
#                     logger.info(f'Indicator: {ind} - Region: {region} - Successfully Ingested')
#             except Exception as e:
#                 logger.error(f'Region: {region} - Indicator: {ind} - {e}')

In [81]:
get_data(tracked_indicators, region_ids)

Indicator: ZSFH - Region: 99999 - Successfully Ingested
Indicator: ZSFH - Region: 99998 - Successfully Ingested
Indicator: ZSFH - Region: 99997 - Successfully Ingested
Indicator: ZSFH - Region: 99996 - Successfully Ingested
Indicator: ZSFH - Region: 99995 - Successfully Ingested
Indicator: ZSFH - Region: 99994 - Successfully Ingested
Indicator: ZSFH - Region: 99993 - Successfully Ingested
Indicator: ZSFH - Region: 99992 - Successfully Ingested
Indicator: ZSFH - Region: 99991 - Successfully Ingested
Indicator: ZSFH - Region: 99990 - Successfully Ingested
Indicator: ZSFH - Region: 99989 - Successfully Ingested
Indicator: ZSFH - Region: 99988 - Successfully Ingested
Indicator: ZSFH - Region: 99987 - Successfully Ingested
Indicator: ZSFH - Region: 99986 - Successfully Ingested
Indicator: ZSFH - Region: 99985 - Successfully Ingested
Indicator: ZSFH - Region: 99984 - Successfully Ingested
Indicator: ZSFH - Region: 99983 - Successfully Ingested
Indicator: ZSFH - Region: 99982 - Successfully I

Indicator: ZSFH - Region: 99837 - Successfully Ingested
Indicator: ZSFH - Region: 99832 - Successfully Ingested
Indicator: ZSFH - Region: 99829 - Successfully Ingested
Indicator: ZSFH - Region: 99828 - Successfully Ingested
Indicator: ZSFH - Region: 99827 - Successfully Ingested
Indicator: ZSFH - Region: 99826 - Successfully Ingested
Indicator: ZSFH - Region: 99825 - Successfully Ingested
Indicator: ZSFH - Region: 99818 - Successfully Ingested
Indicator: ZSFH - Region: 99817 - Successfully Ingested
Indicator: ZSFH - Region: 99816 - Successfully Ingested
Indicator: ZSFH - Region: 99810 - Successfully Ingested
Indicator: ZSFH - Region: 99809 - Successfully Ingested
Indicator: ZSFH - Region: 99808 - Successfully Ingested
Indicator: ZSFH - Region: 99807 - Successfully Ingested
Indicator: ZSFH - Region: 99805 - Successfully Ingested
Indicator: ZSFH - Region: 99800 - Successfully Ingested
Indicator: ZSFH - Region: 99799 - Successfully Ingested
Indicator: ZSFH - Region: 99798 - Successfully I

Indicator: ZSFH - Region: 99633 - Successfully Ingested
Indicator: ZSFH - Region: 99631 - Successfully Ingested
Indicator: ZSFH - Region: 99629 - Successfully Ingested
Indicator: ZSFH - Region: 99627 - Successfully Ingested
Indicator: ZSFH - Region: 99626 - Successfully Ingested
Indicator: ZSFH - Region: 99625 - Successfully Ingested
Indicator: ZSFH - Region: 99624 - Successfully Ingested
Indicator: ZSFH - Region: 99623 - Successfully Ingested
Indicator: ZSFH - Region: 99618 - Successfully Ingested
Indicator: ZSFH - Region: 99614 - Successfully Ingested
Indicator: ZSFH - Region: 99613 - Successfully Ingested
Indicator: ZSFH - Region: 99608 - Successfully Ingested
Indicator: ZSFH - Region: 99607 - Successfully Ingested
Indicator: ZSFH - Region: 99601 - Successfully Ingested
Indicator: ZSFH - Region: 99598 - Successfully Ingested
Indicator: ZSFH - Region: 99597 - Successfully Ingested
Indicator: ZSFH - Region: 99595 - Successfully Ingested
Indicator: ZSFH - Region: 99591 - Successfully I

Indicator: ZSFH - Region: 99432 - Successfully Ingested
Indicator: ZSFH - Region: 99431 - Successfully Ingested
Indicator: ZSFH - Region: 99430 - Successfully Ingested
Indicator: ZSFH - Region: 99429 - Successfully Ingested
Indicator: ZSFH - Region: 99428 - Successfully Ingested
Indicator: ZSFH - Region: 99427 - Successfully Ingested
Indicator: ZSFH - Region: 99426 - Successfully Ingested
Indicator: ZSFH - Region: 99425 - Successfully Ingested
Indicator: ZSFH - Region: 99423 - Successfully Ingested
Indicator: ZSFH - Region: 99422 - Successfully Ingested
Indicator: ZSFH - Region: 99421 - Successfully Ingested
Indicator: ZSFH - Region: 99420 - Successfully Ingested
Indicator: ZSFH - Region: 99419 - Successfully Ingested
Indicator: ZSFH - Region: 99417 - Successfully Ingested
Indicator: ZSFH - Region: 99416 - Successfully Ingested
Indicator: ZSFH - Region: 99414 - Successfully Ingested
Indicator: ZSFH - Region: 99413 - Successfully Ingested
Indicator: ZSFH - Region: 99412 - Successfully I

Indicator: ZSFH - Region: 99263 - Successfully Ingested
Indicator: ZSFH - Region: 99262 - Successfully Ingested
Indicator: ZSFH - Region: 99261 - Successfully Ingested
Indicator: ZSFH - Region: 99260 - Successfully Ingested
Indicator: ZSFH - Region: 99258 - Successfully Ingested
Indicator: ZSFH - Region: 99256 - Successfully Ingested
Indicator: ZSFH - Region: 99255 - Successfully Ingested
Indicator: ZSFH - Region: 99254 - Successfully Ingested
Indicator: ZSFH - Region: 99253 - Successfully Ingested
Indicator: ZSFH - Region: 99252 - Successfully Ingested
Indicator: ZSFH - Region: 99251 - Successfully Ingested
Indicator: ZSFH - Region: 99250 - Successfully Ingested
Indicator: ZSFH - Region: 99249 - Successfully Ingested
Indicator: ZSFH - Region: 99248 - Successfully Ingested
Indicator: ZSFH - Region: 99247 - Successfully Ingested
Indicator: ZSFH - Region: 99246 - Successfully Ingested
Indicator: ZSFH - Region: 99245 - Successfully Ingested
Indicator: ZSFH - Region: 99244 - Successfully I

Indicator: ZSFH - Region: 99066 - Successfully Ingested
Indicator: ZSFH - Region: 99065 - Successfully Ingested
Indicator: ZSFH - Region: 99064 - Successfully Ingested
Indicator: ZSFH - Region: 99063 - Successfully Ingested
Indicator: ZSFH - Region: 99062 - Successfully Ingested
Indicator: ZSFH - Region: 99061 - Successfully Ingested
Indicator: ZSFH - Region: 99060 - Successfully Ingested
Indicator: ZSFH - Region: 99059 - Successfully Ingested
Indicator: ZSFH - Region: 99058 - Successfully Ingested
Indicator: ZSFH - Region: 99057 - Successfully Ingested
Indicator: ZSFH - Region: 99056 - Successfully Ingested
Indicator: ZSFH - Region: 99055 - Successfully Ingested
Indicator: ZSFH - Region: 99054 - Successfully Ingested
Indicator: ZSFH - Region: 99053 - Successfully Ingested
Indicator: ZSFH - Region: 99052 - Successfully Ingested
Indicator: ZSFH - Region: 99051 - Successfully Ingested
Indicator: ZSFH - Region: 99050 - Successfully Ingested
Indicator: ZSFH - Region: 99049 - Successfully I

Indicator: ZSFH - Region: 98657 - Successfully Ingested
Indicator: ZSFH - Region: 98656 - Successfully Ingested
Indicator: ZSFH - Region: 98655 - Successfully Ingested
Indicator: ZSFH - Region: 98654 - Successfully Ingested
Indicator: ZSFH - Region: 98653 - Successfully Ingested
Indicator: ZSFH - Region: 98652 - Successfully Ingested
Indicator: ZSFH - Region: 98651 - Successfully Ingested
Indicator: ZSFH - Region: 98650 - Successfully Ingested
Indicator: ZSFH - Region: 98649 - Successfully Ingested
Indicator: ZSFH - Region: 98648 - Successfully Ingested
Indicator: ZSFH - Region: 98647 - Successfully Ingested
Indicator: ZSFH - Region: 98646 - Successfully Ingested
Indicator: ZSFH - Region: 98645 - Successfully Ingested
Indicator: ZSFH - Region: 98644 - Successfully Ingested
Indicator: ZSFH - Region: 98643 - Successfully Ingested
Indicator: ZSFH - Region: 98642 - Successfully Ingested
Indicator: ZSFH - Region: 98641 - Successfully Ingested
Indicator: ZSFH - Region: 98640 - Successfully I

Indicator: ZSFH - Region: 98502 - Successfully Ingested
Indicator: ZSFH - Region: 98501 - Successfully Ingested
Indicator: ZSFH - Region: 98500 - Successfully Ingested
Indicator: ZSFH - Region: 98499 - Successfully Ingested
Indicator: ZSFH - Region: 98498 - Successfully Ingested
Indicator: ZSFH - Region: 98497 - Successfully Ingested
Indicator: ZSFH - Region: 98496 - Successfully Ingested
Indicator: ZSFH - Region: 98495 - Successfully Ingested
Indicator: ZSFH - Region: 98494 - Successfully Ingested
Indicator: ZSFH - Region: 98493 - Successfully Ingested
Indicator: ZSFH - Region: 98491 - Successfully Ingested
Indicator: ZSFH - Region: 98490 - Successfully Ingested
Indicator: ZSFH - Region: 98489 - Successfully Ingested
Indicator: ZSFH - Region: 98488 - Successfully Ingested
Indicator: ZSFH - Region: 98487 - Successfully Ingested
Indicator: ZSFH - Region: 98486 - Successfully Ingested
Indicator: ZSFH - Region: 98485 - Successfully Ingested
Indicator: ZSFH - Region: 98484 - Successfully I

Indicator: ZSFH - Region: 98322 - Successfully Ingested
Indicator: ZSFH - Region: 98321 - Successfully Ingested
Indicator: ZSFH - Region: 98320 - Successfully Ingested
Indicator: ZSFH - Region: 98319 - Successfully Ingested
Indicator: ZSFH - Region: 98318 - Successfully Ingested
Indicator: ZSFH - Region: 98317 - Successfully Ingested
Indicator: ZSFH - Region: 98316 - Successfully Ingested
Indicator: ZSFH - Region: 98315 - Successfully Ingested
Indicator: ZSFH - Region: 98314 - Successfully Ingested
Indicator: ZSFH - Region: 98312 - Successfully Ingested
Indicator: ZSFH - Region: 98311 - Successfully Ingested
Indicator: ZSFH - Region: 98310 - Successfully Ingested
Indicator: ZSFH - Region: 98309 - Successfully Ingested
Indicator: ZSFH - Region: 98308 - Successfully Ingested
Indicator: ZSFH - Region: 98306 - Successfully Ingested
Indicator: ZSFH - Region: 98304 - Successfully Ingested
Indicator: ZSFH - Region: 98303 - Successfully Ingested
Indicator: ZSFH - Region: 98302 - Successfully I

Indicator: ZSFH - Region: 98151 - Successfully Ingested
Indicator: ZSFH - Region: 98149 - Successfully Ingested
Indicator: ZSFH - Region: 98148 - Successfully Ingested
Indicator: ZSFH - Region: 98147 - Successfully Ingested
Indicator: ZSFH - Region: 98146 - Successfully Ingested
Indicator: ZSFH - Region: 98145 - Successfully Ingested
Indicator: ZSFH - Region: 98144 - Successfully Ingested
Indicator: ZSFH - Region: 98143 - Successfully Ingested
Indicator: ZSFH - Region: 98142 - Successfully Ingested
Indicator: ZSFH - Region: 98141 - Successfully Ingested
Indicator: ZSFH - Region: 98140 - Successfully Ingested
Indicator: ZSFH - Region: 98139 - Successfully Ingested
Indicator: ZSFH - Region: 98138 - Successfully Ingested
Indicator: ZSFH - Region: 98137 - Successfully Ingested
Indicator: ZSFH - Region: 98136 - Successfully Ingested
Indicator: ZSFH - Region: 98135 - Successfully Ingested
Indicator: ZSFH - Region: 98134 - Successfully Ingested
Indicator: ZSFH - Region: 98131 - Successfully I

KeyboardInterrupt: 