## Introduction

In this notebook, I gathered and processed datasets related to home values in TX and geographical informations of TX.

In [1]:
# Import packages
## General
import pandas as pd
import numpy as np
## geojson file
from urllib.request import urlopen
import json
import urllib
import requests

### Zipcode in TX
Latitude and longitude of zipcodes in TX\
Data source: [opendatasoft](https://public.opendatasoft.com/explore/dataset/us-zip-code-latitude-and-longitude/export/?refine.state=TX&location=11,29.78166,-95.33283&basemap=jawg.streets)


In [3]:
tx_zip = pd.read_csv('us-zip-code-latitude-and-longitude.csv', sep=';')
print(tx_zip.shape)
tx_zip.head()

(2743, 8)


Unnamed: 0,Zip,City,State,Latitude,Longitude,Timezone,Daylight savings time flag,geopoint
0,76061,Lillian,TX,32.502674,-97.16347,-6,1,"32.502674,-97.16347"
1,78789,Austin,TX,30.326374,-97.771258,-6,1,"30.326374,-97.771258"
2,76102,Fort Worth,TX,32.75388,-97.32987,-6,1,"32.75388,-97.32987"
3,88545,El Paso,TX,31.694842,-106.299987,-7,1,"31.694842,-106.299987"
4,79058,Masterson,TX,35.837775,-101.892846,-6,1,"35.837775,-101.892846"


#### Make subsets of Houston, San Antonio, Dallas, and Austin
These four cities are in the top 10 biggest cities in the US

- Houston

In [5]:
hst_zip = tx_zip.query('City == "Houston"').copy()
print(hst_zip.shape)
hst_zip.head()

(182, 8)


Unnamed: 0,Zip,City,State,Latitude,Longitude,Timezone,Daylight savings time flag,geopoint
6,77071,Houston,TX,29.647637,-95.51718,-6,1,"29.647637,-95.51718"
22,77006,Houston,TX,29.741878,-95.38944,-6,1,"29.741878,-95.38944"
68,77266,Houston,TX,29.83399,-95.434241,-6,1,"29.83399,-95.434241"
81,77268,Houston,TX,29.83399,-95.434241,-6,1,"29.83399,-95.434241"
91,77010,Houston,TX,29.754728,-95.36216,-6,1,"29.754728,-95.36216"


In [10]:
hst_zip.to_csv('TX_data/houston_zipcode_lat_lng.csv', index=False)

- San Antonio

In [6]:
sa_zip = tx_zip.query('City == "San Antonio"').copy()
print(sa_zip.shape)
sa_zip.head()

(90, 8)


Unnamed: 0,Zip,City,State,Latitude,Longitude,Timezone,Daylight savings time flag,geopoint
47,78239,San Antonio,TX,29.51613,-98.36161,-6,1,"29.51613,-98.36161"
95,78252,San Antonio,TX,29.335577,-98.70173,-6,1,"29.335577,-98.70173"
113,78295,San Antonio,TX,29.437532,-98.461582,-6,1,"29.437532,-98.461582"
124,78224,San Antonio,TX,29.333436,-98.53934,-6,1,"29.333436,-98.53934"
134,78291,San Antonio,TX,29.437532,-98.461582,-6,1,"29.437532,-98.461582"


In [12]:
sa_zip.to_csv('TX_data/san_antonio_zipcode_lat_lng.csv', index=False)

- Dallas

In [7]:
dls_zip = tx_zip.query('City == "Dallas"').copy()
print(dls_zip.shape)
dls_zip.head()

(122, 8)


Unnamed: 0,Zip,City,State,Latitude,Longitude,Timezone,Daylight savings time flag,geopoint
8,75353,Dallas,TX,32.767268,-96.777626,-6,1,"32.767268,-96.777626"
16,75264,Dallas,TX,32.767268,-96.777626,-6,1,"32.767268,-96.777626"
29,75205,Dallas,TX,32.836094,-96.79524,-6,1,"32.836094,-96.79524"
39,75301,Dallas,TX,32.767268,-96.777626,-6,1,"32.767268,-96.777626"
51,75230,Dallas,TX,32.901176,-96.79054,-6,1,"32.901176,-96.79054"


In [13]:
dls_zip.to_csv('TX_data/dallas_zipcode_lat_lng.csv', index=False)

- Austin

In [8]:
ast_zip = tx_zip.query('City == "Austin"').copy()
print(ast_zip.shape)
ast_zip.head()

(83, 8)


Unnamed: 0,Zip,City,State,Latitude,Longitude,Timezone,Daylight savings time flag,geopoint
1,78789,Austin,TX,30.326374,-97.771258,-6,1,"30.326374,-97.771258"
72,78773,Austin,TX,30.326374,-97.771258,-6,1,"30.326374,-97.771258"
103,78709,Austin,TX,30.326374,-97.771258,-6,1,"30.326374,-97.771258"
143,78745,Austin,TX,30.207559,-97.79575,-6,1,"30.207559,-97.79575"
173,78759,Austin,TX,30.406169,-97.75743,-6,1,"30.406169,-97.75743"


In [14]:
ast_zip.to_csv('TX_data/austin_zipcode_lat_lng.csv', index=False)

### Home Value 
Zillow Home Value Index (ZHVI) of different home types\
Data source: [Zillow](https://www.zillow.com/research/data/)



#### - Geographical level: City
Used for analyzing and visualizing historical data on the level of city

In [17]:
city_1b = pd.read_csv('zillow_data/City_zhvi_bdrmcnt_1_uc_sfrcondo_tier_0.33_0.67_sm_sa_mon.csv')
city_2b = pd.read_csv('zillow_data/City_zhvi_bdrmcnt_2_uc_sfrcondo_tier_0.33_0.67_sm_sa_mon.csv')
city_3b = pd.read_csv('zillow_data/City_zhvi_bdrmcnt_3_uc_sfrcondo_tier_0.33_0.67_sm_sa_mon.csv')
city_4b = pd.read_csv('zillow_data/City_zhvi_bdrmcnt_4_uc_sfrcondo_tier_0.33_0.67_sm_sa_mon.csv')
city_5b = pd.read_csv('zillow_data/City_zhvi_bdrmcnt_5_uc_sfrcondo_tier_0.33_0.67_sm_sa_mon.csv')
city_cd = pd.read_csv('zillow_data/City_zhvi_uc_condo_tier_0.33_0.67_sm_sa_mon.csv')
city_sfr = pd.read_csv('zillow_data/City_zhvi_uc_sfr_tier_0.33_0.67_sm_sa_mon.csv')

**Make subsets of TX and Clean the Dataframe**

In [46]:
def tx_subset(df, home_type):
    tx_df = df.query('State=="TX"').copy()
    tx_df['HomeType'] = home_type
    tx_df_c = tx_df.drop(columns=['RegionID', 'RegionType', 'StateName'])
    tx_df_c.rename(columns={'RegionName': 'City'}, inplace=True)
    cols = tx_df_c.columns.tolist()
    cols = cols[-1:] + cols[:-1] # cols[-1:] generates a list containing only the last column label
    tx_df_c = tx_df_c[cols]
    return tx_df_c

In [47]:
tx_city_1b = tx_subset(city_1b, 'One Bedroom')
print(tx_city_1b.shape)
tx_city_1b.head()

(533, 307)


Unnamed: 0,HomeType,SizeRank,City,State,Metro,CountyName,1996-01-31,1996-02-29,1996-03-31,1996-04-30,...,2020-04-30,2020-05-31,2020-06-30,2020-07-31,2020-08-31,2020-09-30,2020-10-31,2020-11-30,2020-12-31,2021-01-31
2,One Bedroom,2,Houston,TX,Houston-The Woodlands-Sugar Land,Harris County,71747.0,71887.0,71918.0,72217.0,...,112149.0,112496.0,112742.0,113091.0,113346.0,114359.0,115396.0,116949.0,117962.0,118954.0
4,One Bedroom,4,San Antonio,TX,San Antonio-New Braunfels,Bexar County,58982.0,58733.0,58584.0,58289.0,...,99864.0,100000.0,100102.0,100765.0,101131.0,101489.0,101997.0,102704.0,103607.0,104068.0
9,One Bedroom,9,Dallas,TX,Dallas-Fort Worth-Arlington,Dallas County,84781.0,83314.0,82332.0,80381.0,...,141385.0,142657.0,143523.0,143912.0,143871.0,143744.0,143810.0,143904.0,144890.0,145525.0
10,One Bedroom,10,Austin,TX,Austin-Round Rock,Travis County,139202.0,138835.0,137509.0,136284.0,...,283760.0,286451.0,288454.0,290290.0,292319.0,294717.0,297636.0,300851.0,304298.0,307055.0
16,One Bedroom,16,Fort Worth,TX,Dallas-Fort Worth-Arlington,Tarrant County,88460.0,88603.0,88913.0,89334.0,...,158290.0,158688.0,158918.0,159877.0,160933.0,162646.0,165493.0,168208.0,170421.0,171486.0


In [48]:
tx_city_2b = tx_subset(city_2b, 'Two Bedrooms')
tx_city_3b = tx_subset(city_3b, 'Three Bedrooms')
tx_city_4b = tx_subset(city_4b, 'Four Bedrooms')
tx_city_5b = tx_subset(city_5b, 'Five Bedrooms And More')
tx_city_cd = tx_subset(city_cd, 'Condominium and Co-operative Homes')
tx_city_sfr = tx_subset(city_sfr, 'Single-family Residences')

**Concatenate the dataframes sliced above**

In [49]:
dfs = [tx_city_1b, tx_city_2b, tx_city_3b, tx_city_4b, tx_city_5b, tx_city_cd, tx_city_sfr]
tx_city_zhvi = pd.concat(dfs, ignore_index=True)
print(tx_city_zhvi.shape)
tx_city_zhvi.head()

(6542, 307)


Unnamed: 0,HomeType,SizeRank,City,State,Metro,CountyName,1996-01-31,1996-02-29,1996-03-31,1996-04-30,...,2020-04-30,2020-05-31,2020-06-30,2020-07-31,2020-08-31,2020-09-30,2020-10-31,2020-11-30,2020-12-31,2021-01-31
0,One Bedroom,2,Houston,TX,Houston-The Woodlands-Sugar Land,Harris County,71747.0,71887.0,71918.0,72217.0,...,112149.0,112496.0,112742.0,113091.0,113346.0,114359.0,115396.0,116949.0,117962.0,118954.0
1,One Bedroom,4,San Antonio,TX,San Antonio-New Braunfels,Bexar County,58982.0,58733.0,58584.0,58289.0,...,99864.0,100000.0,100102.0,100765.0,101131.0,101489.0,101997.0,102704.0,103607.0,104068.0
2,One Bedroom,9,Dallas,TX,Dallas-Fort Worth-Arlington,Dallas County,84781.0,83314.0,82332.0,80381.0,...,141385.0,142657.0,143523.0,143912.0,143871.0,143744.0,143810.0,143904.0,144890.0,145525.0
3,One Bedroom,10,Austin,TX,Austin-Round Rock,Travis County,139202.0,138835.0,137509.0,136284.0,...,283760.0,286451.0,288454.0,290290.0,292319.0,294717.0,297636.0,300851.0,304298.0,307055.0
4,One Bedroom,16,Fort Worth,TX,Dallas-Fort Worth-Arlington,Tarrant County,88460.0,88603.0,88913.0,89334.0,...,158290.0,158688.0,158918.0,159877.0,160933.0,162646.0,165493.0,168208.0,170421.0,171486.0


In [60]:
tx_city_zhvi.groupby('HomeType').count()

Unnamed: 0_level_0,SizeRank,City,State,Metro,CountyName,1996-01-31,1996-02-29,1996-03-31,1996-04-30,1996-05-31,...,2020-04-30,2020-05-31,2020-06-30,2020-07-31,2020-08-31,2020-09-30,2020-10-31,2020-11-30,2020-12-31,2021-01-31
HomeType,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Condominium and Co-operative Homes,236,236,236,222,236,61,61,61,61,61,...,236,236,236,236,236,236,236,236,236,236
Five Bedrooms And More,755,755,755,640,755,189,194,195,195,196,...,755,755,755,755,755,755,755,755,755,755
Four Bedrooms,1111,1111,1111,881,1111,270,276,276,276,277,...,1111,1111,1111,1111,1111,1111,1111,1111,1111,1111
One Bedroom,533,533,533,429,533,59,61,61,61,61,...,533,533,533,533,533,533,533,533,533,533
Single-family Residences,1529,1529,1529,1113,1529,308,312,312,312,313,...,1529,1529,1529,1529,1529,1529,1529,1529,1529,1529
Three Bedrooms,1314,1314,1314,996,1314,301,306,307,307,307,...,1314,1314,1314,1314,1314,1314,1314,1314,1314,1314
Two Bedrooms,1064,1064,1064,818,1064,204,207,207,207,208,...,1064,1064,1064,1064,1064,1064,1064,1064,1064,1064


In [50]:
tx_city_zhvi.to_csv('TX_data/tx_city_zhvi.csv', index=False)

#### - Geographical level: County
used for analyzing and visualizing changes in home values in 2020

In [42]:
county_1b = pd.read_csv('zillow_data/County_zhvi_bdrmcnt_1_uc_sfrcondo_tier_0.33_0.67_sm_sa_mon.csv')
county_2b = pd.read_csv('zillow_data/County_zhvi_bdrmcnt_2_uc_sfrcondo_tier_0.33_0.67_sm_sa_mon.csv')
county_3b = pd.read_csv('zillow_data/County_zhvi_bdrmcnt_3_uc_sfrcondo_tier_0.33_0.67_sm_sa_mon.csv')
county_4b = pd.read_csv('zillow_data/County_zhvi_bdrmcnt_4_uc_sfrcondo_tier_0.33_0.67_sm_sa_mon.csv')
county_5b = pd.read_csv('zillow_data/County_zhvi_bdrmcnt_5_uc_sfrcondo_tier_0.33_0.67_sm_sa_mon.csv')
county_cd = pd.read_csv('zillow_data/County_zhvi_uc_condo_tier_0.33_0.67_sm_sa_mon.csv')
county_sfr = pd.read_csv('zillow_data/County_zhvi_uc_sfr_tier_0.33_0.67_sm_sa_mon.csv')

In [43]:
county_1b.head()

Unnamed: 0,RegionID,SizeRank,RegionName,RegionType,StateName,State,Metro,StateCodeFIPS,MunicipalCodeFIPS,1996-01-31,...,2020-04-30,2020-05-31,2020-06-30,2020-07-31,2020-08-31,2020-09-30,2020-10-31,2020-11-30,2020-12-31,2021-01-31
0,3101,0,Los Angeles County,County,CA,CA,Los Angeles-Long Beach-Anaheim,6,37,110947.0,...,483200.0,483864.0,484218.0,487222.0,491580.0,496258.0,499569.0,502890.0,505718.0,508243.0
1,139,1,Cook County,County,IL,IL,Chicago-Naperville-Elgin,17,31,91570.0,...,199958.0,200251.0,200585.0,201315.0,202173.0,203400.0,204635.0,205909.0,207097.0,207849.0
2,1090,2,Harris County,County,TX,TX,Houston-The Woodlands-Sugar Land,48,201,75472.0,...,110814.0,111199.0,111485.0,111880.0,112176.0,113304.0,114469.0,116189.0,117325.0,118459.0
3,2402,3,Maricopa County,County,AZ,AZ,Phoenix-Mesa-Scottsdale,4,13,63163.0,...,161131.0,162827.0,164452.0,166134.0,168442.0,171194.0,174120.0,177135.0,180211.0,183758.0
4,2841,4,San Diego County,County,CA,CA,San Diego-Carlsbad,6,73,89296.0,...,348571.0,350142.0,350890.0,352081.0,354405.0,358228.0,362215.0,365770.0,369302.0,373088.0


**Make subsets of TX and Clean the Dataframe**

In [55]:
def tx_county_clean(df, home_type):
    df1 = df.query('State=="TX"').copy()
    
    df1['StateCodeFIPS'] = df1['StateCodeFIPS'].astype(str)
    df1['MunicipalCodeFIPS'] = df1['MunicipalCodeFIPS'].astype(str).str.rjust(3, fillchar='0')
    df1['FIPS'] = df1['StateCodeFIPS'] + df1['MunicipalCodeFIPS']
    
    df1.drop(columns=['RegionID', 'RegionType', 'StateName', 'StateCodeFIPS', 'MunicipalCodeFIPS'], inplace=True)
    df1.rename(columns={'RegionName': 'CountyName'}, inplace=True)
    df1['HomeType'] = home_type
    
    cols = df1.columns.tolist()
    cols = cols[-2:] + cols[:-2]
    df1 = df1[cols]
    return df1

In [56]:
tx_county_1b = tx_county_clean(county_1b, 'One Bedroom')
tx_county_1b.head()

Unnamed: 0,FIPS,HomeType,SizeRank,CountyName,State,Metro,1996-01-31,1996-02-29,1996-03-31,1996-04-30,...,2020-04-30,2020-05-31,2020-06-30,2020-07-31,2020-08-31,2020-09-30,2020-10-31,2020-11-30,2020-12-31,2021-01-31
2,48201,One Bedroom,2,Harris County,TX,Houston-The Woodlands-Sugar Land,75472.0,75616.0,75610.0,75889.0,...,110814.0,111199.0,111485.0,111880.0,112176.0,113304.0,114469.0,116189.0,117325.0,118459.0
8,48113,One Bedroom,8,Dallas County,TX,Dallas-Fort Worth-Arlington,,,,,...,138796.0,140029.0,140842.0,141286.0,141354.0,141388.0,141588.0,141790.0,142816.0,143450.0
15,48439,One Bedroom,15,Tarrant County,TX,Dallas-Fort Worth-Arlington,70696.0,70880.0,71166.0,71644.0,...,137116.0,138058.0,139151.0,140695.0,142396.0,143296.0,145292.0,146959.0,149424.0,150915.0
18,48029,One Bedroom,18,Bexar County,TX,San Antonio-New Braunfels,62618.0,62350.0,62190.0,61888.0,...,102004.0,102082.0,102137.0,102791.0,103167.0,103558.0,104129.0,104910.0,105852.0,106297.0
38,48453,One Bedroom,38,Travis County,TX,Austin-Round Rock,140577.0,140238.0,138964.0,137736.0,...,280327.0,283048.0,285092.0,287006.0,289102.0,291535.0,294462.0,297703.0,301185.0,304000.0


In [57]:
tx_county_2b = tx_county_clean(county_2b, 'Two Bedrooms')
tx_county_3b = tx_county_clean(county_3b, 'Three Bedrooms')
tx_county_4b = tx_county_clean(county_4b, 'Four Bedrooms')
tx_county_5b = tx_county_clean(county_5b, 'Five Bedrooms And More')
tx_county_cd = tx_county_clean(county_cd, 'Condominium and Co-operative Homes')
tx_county_sfr = tx_county_clean(county_sfr, 'Single-family Residences')

**Concatenate the dataframes sliced above**

In [58]:
dfs = [tx_county_1b, tx_county_2b, tx_county_3b, tx_county_4b, tx_county_5b, tx_county_cd, tx_county_sfr]
tx_county_zhvi = pd.concat(dfs, ignore_index=True)
print(tx_county_zhvi.shape)
tx_county_zhvi.head()

(1273, 307)


Unnamed: 0,FIPS,HomeType,SizeRank,CountyName,State,Metro,1996-01-31,1996-02-29,1996-03-31,1996-04-30,...,2020-04-30,2020-05-31,2020-06-30,2020-07-31,2020-08-31,2020-09-30,2020-10-31,2020-11-30,2020-12-31,2021-01-31
0,48201,One Bedroom,2,Harris County,TX,Houston-The Woodlands-Sugar Land,75472.0,75616.0,75610.0,75889.0,...,110814.0,111199.0,111485.0,111880.0,112176.0,113304.0,114469.0,116189.0,117325.0,118459.0
1,48113,One Bedroom,8,Dallas County,TX,Dallas-Fort Worth-Arlington,,,,,...,138796.0,140029.0,140842.0,141286.0,141354.0,141388.0,141588.0,141790.0,142816.0,143450.0
2,48439,One Bedroom,15,Tarrant County,TX,Dallas-Fort Worth-Arlington,70696.0,70880.0,71166.0,71644.0,...,137116.0,138058.0,139151.0,140695.0,142396.0,143296.0,145292.0,146959.0,149424.0,150915.0
3,48029,One Bedroom,18,Bexar County,TX,San Antonio-New Braunfels,62618.0,62350.0,62190.0,61888.0,...,102004.0,102082.0,102137.0,102791.0,103167.0,103558.0,104129.0,104910.0,105852.0,106297.0
4,48453,One Bedroom,38,Travis County,TX,Austin-Round Rock,140577.0,140238.0,138964.0,137736.0,...,280327.0,283048.0,285092.0,287006.0,289102.0,291535.0,294462.0,297703.0,301185.0,304000.0


In [61]:
tx_county_zhvi.groupby('HomeType').count()

Unnamed: 0_level_0,FIPS,SizeRank,CountyName,State,Metro,1996-01-31,1996-02-29,1996-03-31,1996-04-30,1996-05-31,...,2020-04-30,2020-05-31,2020-06-30,2020-07-31,2020-08-31,2020-09-30,2020-10-31,2020-11-30,2020-12-31,2021-01-31
HomeType,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Condominium and Co-operative Homes,87,87,87,87,75,13,13,13,13,13,...,87,87,87,87,87,87,87,87,87,87
Five Bedrooms And More,184,184,184,184,118,15,16,16,16,16,...,184,184,184,184,184,184,184,184,184,184
Four Bedrooms,208,208,208,208,123,16,16,16,16,16,...,208,208,208,208,208,208,208,208,208,208
One Bedroom,165,165,165,165,109,12,12,12,12,12,...,165,165,165,165,165,165,165,165,165,165
Single-family Residences,211,211,211,211,124,16,16,16,16,16,...,211,211,211,211,211,211,211,211,211,211
Three Bedrooms,211,211,211,211,124,16,16,16,16,16,...,211,211,211,211,211,211,211,211,211,211
Two Bedrooms,207,207,207,207,122,16,16,16,16,16,...,207,207,207,207,207,207,207,207,207,207


In [62]:
tx_county_zhvi.to_csv('TX_data/tx_county_zhvi.csv', index=False)

#### - Geographical level: Zip code
used for visualizing regional home value changes in four major cities in TX: Houston, Dallas, San Antonio, and Austin