In [1]:
# Dependencies
import pandas as pd

## Data Retrieval and Sorting

### >> Data Retrieval

In [2]:
# Read US zipcodes data and save as "zipcode_pd"
zipcode_pd = pd.read_csv("../data/rawdata/zipcode_lat_lngs.csv")

# Preview "zipcode_pd"
zipcode_pd.head()

Unnamed: 0,ZIP,LAT,LNG
0,5902,45.009587,-71.487927
1,5903,44.935091,-71.588009
2,5904,44.416217,-71.70654
3,5905,44.702095,-71.679481
4,5906,44.470995,-71.705947


In [3]:
# Read Houston Harris County zipcodes data and save as "harris_cty_zipcode_pd"
harris_cty_zipcode_pd = pd.read_csv("../data/rawdata/Harris_County.csv")

# Preview "harris_cty_zipcode_pd"
harris_cty_zipcode_pd.head()

Unnamed: 0,ZIP
0,77002
1,77004
2,77003
3,77006
4,77005


### >> Data Sorting

In [4]:
# Obtain Harris County zipcodes with Lat and Lng data and save as "harris_cty_zipcode_latlng_pd"
harris_cty_zipcode_latlng_pd = pd.merge(harris_cty_zipcode_pd, zipcode_pd, on="ZIP")

# Preview "harris_cty_zipcode_latlng_pd"
harris_cty_zipcode_latlng_pd.head()

Unnamed: 0,ZIP,LAT,LNG
0,77002,29.756845,-95.365652
1,77004,29.724893,-95.363752
2,77003,29.749778,-95.345885
3,77006,29.74097,-95.391301
4,77005,29.718435,-95.423555


In [5]:
# Check the length of "harris_cty_zipcode_latlng_pd"
len(harris_cty_zipcode_latlng_pd)

135

In [6]:
# Check if there is any Harris County zipcode missing in merged dataframe
missing_zipcode_pd = harris_cty_zipcode_pd[~harris_cty_zipcode_pd["ZIP"].isin(harris_cty_zipcode_latlng_pd["ZIP"])].reset_index(drop=True)

# View "missing_zipcode_pd"
missing_zipcode_pd

Unnamed: 0,ZIP
0,77204
1,77217
2,77249
3,77251
4,77266
5,77268
6,77271
7,77284
8,77289
9,77383


In [7]:
# Fill in Lat and Lng info. for missing Harris County zipcodes
missing_zipcode_pd["LAT"] = [29.7260, 29.6754, 29.8039, 29.7000, 29.7344, 29.9909, 29.7599, 29.7600, 29.5763, 30.0773, 29.9752]
missing_zipcode_pd["LNG"] = [-95.3482, -95.2470, -95.3730, -95.5400, -95.3950, -95.4818, -95.3701, -95.3600, -95.1403, -95.4333, -95.6803]

# View "missing_zipcode_pd"
missing_zipcode_pd

Unnamed: 0,ZIP,LAT,LNG
0,77204,29.726,-95.3482
1,77217,29.6754,-95.247
2,77249,29.8039,-95.373
3,77251,29.7,-95.54
4,77266,29.7344,-95.395
5,77268,29.9909,-95.4818
6,77271,29.7599,-95.3701
7,77284,29.76,-95.36
8,77289,29.5763,-95.1403
9,77383,30.0773,-95.4333


In [8]:
# Add missing zipcodes to "zipcode_pd" 
zipcode_pd_plus_missing = zipcode_pd.append(missing_zipcode_pd, ignore_index=True)

# Check the length of "harris_cty_zipcode_all_pd"
len(zipcode_pd_plus_missing)

31470

In [9]:
# Write "harris_cty_zipcode_all_pd" to csv file
zipcode_pd_plus_missing.to_csv("../data/cleandata/zipcode_lat_lngs_v2.csv", index=False, header=True)

In [10]:
len(zipcode_pd)

31459