# Locations and distance to landmarks in Ames

For the descriptive section of our project we are interested on how close the houses are to areas of potential interest for the tourists. These areas include the Stadium of the Iowa State University, Downtown entertainment and restaurants, as well as the local Airport.

To obtain the location of each house we need the Real Estate Data

In [1]:
import pandas as pd
import numpy as np


realE_df = pd.read_csv('data/Ames_Real_Estate_Data.csv', index_col=0, low_memory=False)
realE_df.columns

Index(['GeoRefNo', 'Tier', 'Range', 'Prop_Addr', 'ZngCdPr', 'ZngCdSc',
       'ZngOLPr', 'ZngOLSc', 'ClassPr_S', 'ClassSc_S', 'Legal_Pr', 'SchD_S',
       'TxD_S', 'MA_Ownr1', 'MA_Ownr2', 'MA_Line1', 'MA_Line2', 'MA_City',
       'MA_State', 'MA_Zip1', 'MA_Zip2', 'Rcrd_Yr', 'Rcrd_Mo', 'Inst1_No',
       'Inst1_Yr', 'Inst1_Mo', 'Inst1TPr', 'LndAc_S', 'ImpAc_S', 'OthAc_S',
       'TtlVal_AsrYr', 'ValType', 'X1TPr_D', 'X1TSc_D', 'X2TPr_D', 'X2TSc_D',
       'X1TPr_S', 'X1TSc_S', 'X2TPr_S', 'X2TSc_S', 'LndAcX1S', 'ImpAcX1S',
       'ImpAcX2S', 'HSTtl_D', 'MilVal_D', 'HSTtl_S', 'MilVal_S', 'AcreX_S1',
       'AcreGr', 'AcreNt_S', 'Neighborhood', 'LotArea', 'ParType', 'BldgNo_S',
       'DwlgNo_S', 'BldgType', 'YrBuilt', 'HouseStyle', 'Foundation',
       'RoofMatl', 'Ext1', 'Ext2', 'MasVnrType', 'Heating', 'Central Air',
       'GLA', 'TtlBsmtSF', 'TotRmsAbvGrd', 'Fireplaces', 'PoolArea',
       'GarageType', 'GarYrBlt', 'Cars', 'GarageArea', 'YrSold_YYYY',
       'MoSold_MM', 'SalePrice', 

In [3]:
raw_data = pd.read_csv('data/Ames_Housing_Price_Data.csv', index_col=0)

We merge the two data sets on PID/MapRefNo columns

In [4]:
full_df = pd.merge(
    raw_data,
    realE_df,
    how="left",
    left_on='PID',
    right_on='MapRefNo',
    sort=True,
    suffixes=("_x", "_y"),
    copy=False)

full_df.sample(10)

Unnamed: 0,PID,GrLivArea,SalePrice_x,MSSubClass,MSZoning,LotFrontage,LotArea_x,Street,Alley,LotShape,...,PA-Nmbr,PA-PreD,PA-Strt,PA-StSfx,PA-PostD,PA-UnTyp,PA-UntNo,Date,Source,NmbrBRs
1838,905478220,1480,136500,50,RL,60.0,11100,Pave,,Reg,...,3123,,STORY,ST,,,,10-Jul-20,Ames City Assessor,4.0
406,528181060,1554,209500,120,RL,34.0,4590,Pave,,Reg,...,2908,,ROXBORO,DR,,,,10-Jul-20,Ames City Assessor,2.0
2375,910200080,919,126000,30,RM,50.0,7000,Pave,,Reg,...,121,S,RUSSELL,AVE,,,,10-Jul-20,Ames City Assessor,2.0
2407,911104060,572,75000,30,C (all),60.0,10200,Pave,Grvl,Reg,...,317,,WASHINGTON,AVE,,,,10-Jul-20,Ames City Assessor,
1979,907192030,1660,195000,60,RL,68.0,8935,Pave,,IR1,...,5313,,CLEMENS,BLVD,,,,10-Jul-20,Ames City Assessor,3.0
889,534126090,1682,174000,20,RL,80.0,10400,Pave,,Reg,...,2617,,CLEVELAND,DR,,,,10-Jul-20,Ames City Assessor,3.0
0,526301100,1656,215000,20,RL,141.0,31770,Pave,,IR1,...,3126,,NORTHWOOD,DR,,,,10-Jul-20,Ames City Assessor,3.0
2538,923225240,1475,127000,160,RM,41.0,2665,Pave,,Reg,...,3472,,SOUTHDALE,DR,,,,10-Jul-20,Ames City Assessor,4.0
1874,906225210,996,147000,85,RL,73.0,9069,Pave,,Reg,...,913,,DELAWARE,AVE,,,,10-Jul-20,Ames City Assessor,2.0
1552,903231080,1136,123000,50,RM,52.0,6240,Pave,,Reg,...,1023,,MARSTON,AVE,,,,10-Jul-20,Ames City Assessor,2.0


Use geopy to:
            1)Obtain the coordinates of each house
            2)Compute the Euclidean distances

In [6]:
import geopy
from geopy import Nominatim
from geopy.extra.rate_limiter import RateLimiter

locator = Nominatim(user_agent="myGeocoder")
geocode = RateLimiter(locator.geocode, min_delay_seconds=1.5)

In [None]:
full_geocodes =[geocode(addr+', Ames, IA , USA') if type(addr)==str else None for addr in full_df['Prop_Addr']] #don't rund this unless you have a couple hours

In [110]:
full_geocodes

[Location(3126, Northwood Drive, Overland Heights, Ames, Story County, Iowa, 50010, United States, (42.053808, -93.618344, 0.0)),
 Location(3115, Northwood Drive, Overland Heights, Ames, Story County, Iowa, 50010, United States, (42.053668, -93.618459, 0.0)),
 Location(3113, Northwood Drive, Overland Heights, Ames, Story County, Iowa, 50010, United States, (42.05357834520508, -93.61827582597934, 0.0)),
 Location(623, River Oak Drive, Overland Heights, Ames, Story County, Iowa, 50010, United States, (42.05284062808919, -93.61863289727206, 0.0)),
 Location(627, River Oak Drive, Overland Heights, Ames, Story County, Iowa, 50010, United States, (42.05285252734815, -93.61867770937495, 0.0)),
 Location(3109, Grove Avenue, Overland Heights, Ames, Story County, Iowa, 50010, United States, (42.052806, -93.619783, 0.0)),
 Location(630, River Oak Drive, Overland Heights, Ames, Story County, Iowa, 50010, United States, (42.05269954061322, -93.61883290722689, 0.0)),
 Location(620, River Oak Drive, 

In [111]:
full_coord = []
for i in range(0,len(full_geocodes)):
    try:
        coord = full_geocodes[i][1]
        
    except:
        coord = None
    
    full_coord.append(coord)

In [113]:
full_df["coord"] = full_coord

In [168]:
full_df.head()

Unnamed: 0,PID,GrLivArea,SalePrice,MSSubClass,MSZoning,LotFrontage,LotArea_x,Street,Alley,LotShape,...,PA-UnTyp,PA-UntNo,Date,Source,NmbrBRs,coord,dist_stadium,dist_downtown,dist_airport,PricePerSqft
0,526301100,1656,215000,20,RL,141.0,31770,Pave,,IR1,...,,,10-Jul-20,Ames City Assessor,3.0,"(42.053808, -93.618344)",4.417461,3.210441,6.102121,129.830918
1,526302030,1293,149900,20,RL,,11027,Pave,,IR1,...,,,10-Jul-20,Ames City Assessor,2.0,"(42.053668, -93.618459)",4.39966,3.196132,6.08608,115.931941
2,526302040,1024,157500,20,RL,85.0,10533,Pave,,IR1,...,,,10-Jul-20,Ames City Assessor,2.0,"(42.05357834520508, -93.61827582597934)",4.3952,3.184448,6.076963,153.808594
3,526302110,882,124500,20,RL,74.0,13008,Pave,,IR1,...,,,10-Jul-20,Ames City Assessor,2.0,"(42.05284062808919, -93.61863289727206)",4.308096,3.10674,5.993553,141.156463
4,526302120,1797,169000,20,RL,,11765,Pave,,IR1,...,,,10-Jul-20,Ames City Assessor,3.0,"(42.05285252734815, -93.61867770937495)",4.308129,3.10853,5.994682,94.045632


In [121]:

full_df['coord'].isnull().sum()

88

In [135]:

problem_addr = full_df[full_df['coord'].isnull()][['Prop_Addr']]
problem_addr['Prop_Addr'] = problem_addr['Prop_Addr'].str.lower()
problem_addr['Prop_Addr'] #I tried to get this addresses to work but could not

127                      1309 kinyon cir
128                      1303 kinyon cir
129                      1210 kinyon cir
130                      1316 kinyon cir
415                     3703 chilton ave
416                     3629 chilton ave
417                     3623 chilton ave
418                     3712 chilton ave
419                       3717 ashton dr
420                       3716 ashton dr
421                      3715 farnham dr
422                     3702 chilton ave
423                       3705 ashton dr
424                       3709 ashton dr
425                      3602 farnham dr
584                      2425 dalton cir
585                      2421 dalton cir
586                      2416 dalton cir
587                      2422 dalton cir
723                                  NaN
782                  2713 northridge cir
783                  2700 northridge cir
1151                                 NaN
1248                     201 e o neil dr
1249            

In [138]:
print(geocode('1309 kinyon cir, Ames, IA , USA'))

None


Now determine the distances between each of the areas of interest and the houses

In [93]:
JackTriceStadium = '42.0162027, -93.6357060'
Downtown = '42.0251029, -93.6138075'
Airport = '41.9989524, -93.6223669'

In [149]:
from geopy import distance
dist_stadium = []
for co in range(0,len(full_geocodes)):
    try:
        dist = distance.distance(full_df.iloc[co]['coord'], JackTriceStadium ).km
        
    except:
        dist = None
    
    dist_stadium.append(dist)

In [152]:
dist_downtown = []
for co in range(0,len(full_geocodes)):
    try:
        dist = distance.distance(full_df.iloc[co]['coord'], Downtown ).km
        
    except:
        dist = None
    
    dist_downtown.append(dist)

In [153]:
dist_airport = []
for co in range(0,len(full_geocodes)):
    try:
        dist = distance.distance(full_df.iloc[co]['coord'], Airport ).km
        
    except:
        dist = None
    
    dist_airport.append(dist)

In [154]:
full_df['dist_stadium']= dist_stadium

In [155]:
full_df['dist_downtown']= dist_downtown

In [156]:
full_df['dist_airport']= dist_airport

In [159]:
full_df.rename(columns={"SalePrice_x": "SalePrice"}, inplace =True)

In [164]:
full_df['PricePerSqft'] = full_df['SalePrice']/full_df['GrLivArea']

In [169]:
full_df.to_csv(r'./data/full_wCoord.csv', index = False)