In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
from datetime import datetime
from itertools import islice

# Import API key
from config import MLONG

In [2]:
fl_census_data = pd.read_csv('Resources/ACSST5Y2019.S1901_data_with_overlays_2021-05-24T165952.csv', header=1)
fl_codes = []

for i,j in fl_census_data.iterrows():
    fl_codes.append(j['Geographic Area Name'][6:11])


split_codes = [iter(fl_codes)] * 123
    
zip_codes = list(zip(*split_codes))

# zip_codes[0]

In [10]:
count = 0
for code in zip_codes[2]:
    count += 1

print(count)
zip_codes[7][83:]

123


('34748',
 '34753',
 '34756',
 '34758',
 '34759',
 '34760',
 '34761',
 '34762',
 '34769',
 '34771',
 '34772',
 '34773',
 '34785',
 '34786',
 '34787',
 '34788',
 '34797',
 '34945',
 '34946',
 '34947',
 '34949',
 '34950',
 '34951',
 '34952',
 '34953',
 '34956',
 '34957',
 '34972',
 '34974',
 '34981',
 '34982',
 '34983',
 '34984',
 '34986',
 '34987',
 '34990',
 '34994',
 '34996',
 '34997',
 ' Stat')

In [104]:
# Read in historical mortgage rate
mortgage_data = pd.read_csv('Resources/morgage_rates.csv')
year_mortgage = []
month_mortgage = []
mortgage_rate = []

# Loop to limit data for 2019 & 2020
for i,j in mortgage_data.iterrows():
    if j['DATE']>='2019%' and j['DATE']<'2021%':
        year_mortgage.append(pd.to_datetime(j['DATE']).year)
        month_mortgage.append(pd.to_datetime(j['DATE']).month)
        mortgage_rate.append(j['MORTGAGE30US'])


# Create mortgage rate DF grouped by year and month to get the mean
mortgage_rates = pd.DataFrame({"year":year_mortgage, "month":month_mortgage, 
                        "mortgage_rate":mortgage_rate}).groupby(['year', 'month'])

# Export csv of calculated monthly rates
mortgage_rates.mean().to_csv('Resources/monthly_morgage_rates.csv')

In [11]:
# Establish API URL
property_url = "https://api.gateway.attomdata.com/propertyapi/v1.0.0/"

sales_trend = "salestrend/snapshot?geoid=ZI"

sales_trend_dates = "&interval=monthly&startyear=2019&endyear=2020&startmonth=january&endmonth=december"

headers = {'Accept': 'application/json', 'apikey': MLONG}


# Establish variable for information desired
year = []
month = []
zip_code_sales = []
total_sales = []
average_price = []
median_price = []
sales_no_data = []

for code in zip_codes[7][83:]:
    # Make API call
    sales_response = requests.post( f'{property_url}{sales_trend}{code}{sales_trend_dates}',
                                    headers = headers)
    
    if sales_response.status_code != 200:
        sales_no_data.append(sales_response.url)
    else:
        sales = sales_response.json()

        # Loop through API data and append to variables
        for i in sales['salestrends']:
            year.append(pd.to_datetime(i['daterange']['start']).year)
            month.append(pd.to_datetime(i['daterange']['start']).month)
            zip_code_sales.append(i['location']['geoID'][2:7])
            total_sales.append(i['SalesTrend']['homesalecount'])
            average_price.append(i['SalesTrend']['avgsaleprice'])
            median_price.append(i['SalesTrend']['medsaleprice'])
            
        # Set delay based on API call limitations
        time.sleep(7)

# Create new DF with data
sales_data = pd.DataFrame({"zipcode": zip_code_sales, "year":year, "month":month, 
                        "total_sales":total_sales, "avg_sale_price":average_price,
                        "med_sale_price":median_price})

sales_data

Unnamed: 0,zipcode,year,month,total_sales,avg_sale_price,med_sale_price
0,34748,2019,2,66,140192.0,128950.0
1,34748,2019,3,49,142005.0,119900.0
2,34748,2019,4,102,155678.0,164450.0
3,34748,2019,5,94,171064.0,163950.0
4,34748,2019,6,76,153324.0,152500.0
...,...,...,...,...,...,...
912,34997,2020,8,123,288527.0,251000.0
913,34997,2020,9,115,306618.0,256000.0
914,34997,2020,10,139,324712.0,285000.0
915,34997,2020,11,132,315426.0,259750.0


In [5]:
# Create new DF with data
sales_data_1 = pd.DataFrame({"zipcode": zip_code_sales, "year":year, "month":month, 
                        "total_sales":total_sales, "avg_sale_price":average_price,
                        "med_sale_price":median_price})


In [6]:

sales_data_1

Unnamed: 0,zipcode,year,month,total_sales,avg_sale_price,med_sale_price
0,34291,2019,1,10,231310.0,206000.0
1,34291,2019,2,13,221992.0,240000.0
2,34291,2019,3,21,266510.0,242000.0
3,34291,2019,4,22,267242.0,233000.0
4,34291,2019,5,31,232868.0,196500.0
...,...,...,...,...,...,...
1925,34747,2020,8,194,315594.0,272750.0
1926,34747,2020,9,177,307848.0,295000.0
1927,34747,2020,10,211,324094.0,282000.0
1928,34747,2020,11,184,384162.0,305000.0


In [12]:
sales_data_2 = pd.concat([sales_data_1, sales_data])

sales_data_2

Unnamed: 0,zipcode,year,month,total_sales,avg_sale_price,med_sale_price
0,34291,2019,1,10,231310.0,206000.0
1,34291,2019,2,13,221992.0,240000.0
2,34291,2019,3,21,266510.0,242000.0
3,34291,2019,4,22,267242.0,233000.0
4,34291,2019,5,31,232868.0,196500.0
...,...,...,...,...,...,...
912,34997,2020,8,123,288527.0,251000.0
913,34997,2020,9,115,306618.0,256000.0
914,34997,2020,10,139,324712.0,285000.0
915,34997,2020,11,132,315426.0,259750.0


In [13]:
rates = pd.read_csv('Resources/monthly_morgage_rates.csv')

final_sales = pd.merge(sales_data_2, rates, on=['year','month'])

final_sales

Unnamed: 0,zipcode,year,month,total_sales,avg_sale_price,med_sale_price,mortgage_rate
0,34291,2019,1,10,231310.0,206000.0,4.464
1,34292,2019,1,34,283729.0,272500.0,4.464
2,34293,2019,1,91,238282.0,217000.0,4.464
3,34420,2019,1,29,126858.0,115100.0,4.464
4,34428,2019,1,17,165129.0,147200.0,4.464
...,...,...,...,...,...,...,...
2842,34987,2020,12,48,306515.0,301929.0,2.684
2843,34990,2020,12,129,496651.0,415000.0,2.684
2844,34994,2020,12,49,369545.0,179000.0,2.684
2845,34996,2020,12,54,682443.0,390000.0,2.684


In [14]:
final_sales.to_csv('Resources/Sales_Area_Data/sales_call_8.csv')

In [15]:
# Establish API URL
community_url = "https://api.gateway.attomdata.com/communityapi/v2.0.0"

area_url = "/area/full?AreaId=ZI"


# Establish variable for information desired
zip_code_area = []
prop_tax = []
owner_occupied = []
renter_occupied = []
total_vacant =[]
total_dwellings = []
home_age = []
studio_rent = []
one_bed_rent = []
two_bed_rent = []
three_bed_rent = []
four_bed_rent = []
fte_employed = []
unemployed = []
avg_income = []
proj_income = []
expense_index = []
avg_commute = []
crime_index = []
area_no_data = []

for code in zip_codes[7]:
    
    # Make API call
    area_response = requests.post( f'{community_url}{area_url}{code}', headers = headers)
    if area_response.status_code != 200:
        area_no_data.append(area_response.url)
    else:
        area = area_response.json()
        for i in area:
            zip_code_area.append(area['response']['inputparameter']['AreaId'][2:7])
        for i in area['response']['result']['package']['item']:
            prop_tax.append(i['avg_prop_tax'])
            owner_occupied.append(i['dwlowned'])
            renter_occupied.append(i['dwlrent'])
            total_vacant.append(i['dwlvacnt'])
            total_dwellings.append(i['dwltotal'])
            home_age.append(i['houmedage'])
            studio_rent.append(i['studio_county'])
            one_bed_rent.append(i['one_bed_county'])
            two_bed_rent.append(i['two_bed_county'])
            three_bed_rent.append(i['three_bed_county'])
            four_bed_rent.append(i['four_bed_county'])
            fte_employed.append(i['emptotal'])
            unemployed.append(i['empunemp'])
            avg_income.append(i['inccyavehh'])
            proj_income.append(i['incavehhpy_5'])
            expense_index.append(i['idxexptotal'])
            avg_commute.append(i['trwave'])
            crime_index.append(i['crmcytotc'])
            


    # Set delay based on API call limitations
    time.sleep(7)

area_data = pd.DataFrame({'zipcode': zip_code_area, 'property_tax': prop_tax,
                        'owner_occupied': owner_occupied, 'renter_occupied': renter_occupied,
                        'total_vacant': total_vacant, 'total_dwellings': total_dwellings,
                        'studio_rent': studio_rent, 'one_bed_rent': one_bed_rent,
                        'two_bed_rent': two_bed_rent, 'three_bed_rent': three_bed_rent,
                        'four_bed_rent': four_bed_rent, 'fte_employed': fte_employed,
                        'unemployed':unemployed, 'average_income':avg_income,
                        'projected_income':proj_income, 'expense_index':expense_index,
                        'average_commute':avg_commute, 'crime_index':crime_index})
area_data

Unnamed: 0,zipcode,property_tax,owner_occupied,renter_occupied,total_vacant,total_dwellings,studio_rent,one_bed_rent,two_bed_rent,three_bed_rent,four_bed_rent,fte_employed,unemployed,average_income,projected_income,expense_index,average_commute,crime_index
0,34291,2871,2382,738,503,3624,841,992,1275,1686,2030,265,109,75445,89706,105,30,144
1,34292,3107,6706,1963,2028,10697,841,992,1275,1686,2030,9571,334,84943,101190,75,18,109
2,34293,2897,18400,4172,5608,28180,841,992,1275,1686,2030,8833,1546,82008,98992,78,22,33
3,34420,1415,5192,1937,1311,8439,692,757,939,1226,1356,3909,621,57093,63861,83,26,170
4,34428,1364,3438,982,1145,5566,659,679,895,1279,1550,4184,369,66431,73662,80,22,88
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
112,34986,4312,9944,2865,2542,15351,926,932,1211,1618,1830,12238,367,72324,82326,86,30,78
113,34987,4875,4237,1290,1829,7356,926,932,1211,1618,1830,2392,303,98813,110440,96,27,117
114,34990,4491,11232,2039,1656,14926,926,932,1211,1618,1830,9001,934,120310,139546,101,25,56
115,34996,5157,5202,1267,2780,9249,926,932,1211,1618,1830,5411,163,115311,132782,78,21,140


In [9]:
area_data_1 = pd.DataFrame({'zipcode': zip_code_area, 'property_tax': prop_tax,
                        'owner_occupied': owner_occupied, 'renter_occupied': renter_occupied,
                        'total_vacant': total_vacant, 'total_dwellings': total_dwellings,
                        'studio_rent': studio_rent, 'one_bed_rent': one_bed_rent,
                        'two_bed_rent': two_bed_rent, 'three_bed_rent': three_bed_rent,
                        'four_bed_rent': four_bed_rent, 'fte_employed': fte_employed,
                        'unemployed':unemployed, 'average_income':avg_income,
                        'projected_income':proj_income, 'expense_index':expense_index,
                        'average_commute':avg_commute, 'crime_index':crime_index})
area_data_1

Unnamed: 0,zipcode,property_tax,owner_occupied,renter_occupied,total_vacant,total_dwellings,studio_rent,one_bed_rent,two_bed_rent,three_bed_rent,four_bed_rent,fte_employed,unemployed,average_income,projected_income,expense_index,average_commute,crime_index
0,33569,3310,8393,2363,1207,11962,989,1040,1271,1651,2028,9928,1552,87173,100534,106,27,233
1,33570,3140,6985,3007,2195,12187,989,1040,1271,1651,2028,4464,962,71461,82748,104,26,375
2,33572,4998,6853,2463,1429,10746,989,1040,1271,1651,2028,4188,605,111121,127182,98,30,271
3,33573,2551,12562,3892,2853,19307,989,1040,1271,1651,2028,4670,419,64187,74338,65,18,224
4,33576,2032,1857,308,318,2483,989,1040,1271,1651,2028,1831,205,87684,98954,81,26,151
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98,33843,1162,3001,901,1585,5488,794,799,1023,1375,1764,1788,594,56561,62578,91,27,194
99,33844,2048,11259,4914,5705,21878,794,799,1023,1375,1764,8307,2008,55173,61635,94,29,275
100,33847,1093,54,17,11,82,794,799,1023,1375,1764,38,15,58266,64424,101,32,413
101,33848,1640,296,99,285,680,1055,1140,1321,1713,2057,1003,191,63436,71345,91,25,431


In [16]:
area_data = pd.concat([area_data_1, area_data_2])
area_data

Unnamed: 0,zipcode,property_tax,owner_occupied,renter_occupied,total_vacant,total_dwellings,studio_rent,one_bed_rent,two_bed_rent,three_bed_rent,four_bed_rent,fte_employed,unemployed,average_income,projected_income,expense_index,average_commute,crime_index
0,33569,3310,8393,2363,1207,11962,989,1040,1271,1651,2028,9928,1552,87173,100534,106,27,233
1,33570,3140,6985,3007,2195,12187,989,1040,1271,1651,2028,4464,962,71461,82748,104,26,375
2,33572,4998,6853,2463,1429,10746,989,1040,1271,1651,2028,4188,605,111121,127182,98,30,271
3,33573,2551,12562,3892,2853,19307,989,1040,1271,1651,2028,4670,419,64187,74338,65,18,224
4,33576,2032,1857,308,318,2483,989,1040,1271,1651,2028,1831,205,87684,98954,81,26,151
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14,33873,1240,2934,1576,695,5206,565,569,750,972,1168,4364,534,56587,64385,110,26,275
15,33875,1481,4357,788,896,6041,575,661,871,1100,1183,963,165,65706,72490,84,21,97
16,33876,1310,1821,471,713,3004,575,661,871,1100,1183,669,64,52153,57508,80,22,154
17,33877,852,76,44,26,147,794,799,1023,1375,1764,123,47,44884,49628,72,23,102


In [16]:
area_data.to_csv('Resources/Sales_Area_Data/area_call_8.csv')