Cleaning the dataset (separating coordinates)



In [1]:
import pandas as pd
import numpy as np

In [2]:
from math import radians, cos, sin, asin, sqrt
def haversine(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    """
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    # Radius of earth in kilometers is 6371
    km = 6371* c
    return km

In [3]:
vacancies = pd.read_csv('Retail_Vacant_Storefronts_December_2020.csv')
vacancies.head()

Unnamed: 0,Commercial District,Address,City,State,Zip Code,Square Footage,Length of Vacancy,Ownership Type,Former Tenant/Current Business,Business for Sale,Leasing Activity,Recorded Owner,Leasing Contact,Coordinates for Mapping
0,Smaller Districts,123 River Street,Cambridge,Massachusetts,2139,900.0,5 years or more,Individual,River Gods,,Leased pending occupancy,"Arthur N. Dobelis 263 Hicks St. #4 Brooklyn, N...","Arthur Dobelis, (646) 722-0824",POINT (-71.10871620999995 42.36404486100003)
1,Kendall Square,One Kendall Square,Cambridge,Massachusetts,2139,,6-10 months,Real Estate Investment Trust (REIT),Flat Top Johnny’s,,Unknown,"ARE-MA Region No. 59 LLC, 385 East Colorado Bo...",,POINT (-71.09011699999996 42.36743900000005)
2,Fresh Pond-Alewife,93 Blanchard Rd,Cambridge,Massachusetts,2138,,6-10 months,Trust,Knittin Kitten,,Unknown,"Hillside Garden Supply Co., 280 Blanchard Rd.,...",,POINT (-71.15799258099997 42.39426321500008)
3,Inman Square,168 Hampshire Street,Cambridge,Massachusetts,2139,840.0,5 years or more,Real Estate Investment Trust (REIT),Lapel's Dry Cleaning,,Unknown,"Berkmar LLC c/o NCP Management Company, P.O. B...","NCP Management, Inc. (617) 630-1868",POINT (-71.09891659199997 42.37186069200004)
4,East Cambridge,149 Cambridge Street,Cambridge,Massachusetts,2141,,6-10 months,,Law Offices,,Actively leasing,"Anthony Gargano, trustee of Gargano Family Tru...",(617) 876-6780,POINT (-71.07825855299996 42.37099991300005)


In [4]:
# Create new column with numeric x and y coordinates

vacancies['Coordinates for Mapping'] = vacancies['Coordinates for Mapping'].map(lambda x: x.lstrip('POINT (').rstrip(')'))
vacancies[['long','lat']] = vacancies['Coordinates for Mapping'].str.split(expand=True)
vacancies = vacancies.drop("Coordinates for Mapping", axis = 1)
vacancies[["long", "lat"]] = vacancies[["long", "lat"]].apply(pd.to_numeric)
vacancies.head()

Unnamed: 0,Commercial District,Address,City,State,Zip Code,Square Footage,Length of Vacancy,Ownership Type,Former Tenant/Current Business,Business for Sale,Leasing Activity,Recorded Owner,Leasing Contact,long,lat
0,Smaller Districts,123 River Street,Cambridge,Massachusetts,2139,900.0,5 years or more,Individual,River Gods,,Leased pending occupancy,"Arthur N. Dobelis 263 Hicks St. #4 Brooklyn, N...","Arthur Dobelis, (646) 722-0824",-71.108716,42.364045
1,Kendall Square,One Kendall Square,Cambridge,Massachusetts,2139,,6-10 months,Real Estate Investment Trust (REIT),Flat Top Johnny’s,,Unknown,"ARE-MA Region No. 59 LLC, 385 East Colorado Bo...",,-71.090117,42.367439
2,Fresh Pond-Alewife,93 Blanchard Rd,Cambridge,Massachusetts,2138,,6-10 months,Trust,Knittin Kitten,,Unknown,"Hillside Garden Supply Co., 280 Blanchard Rd.,...",,-71.157993,42.394263
3,Inman Square,168 Hampshire Street,Cambridge,Massachusetts,2139,840.0,5 years or more,Real Estate Investment Trust (REIT),Lapel's Dry Cleaning,,Unknown,"Berkmar LLC c/o NCP Management Company, P.O. B...","NCP Management, Inc. (617) 630-1868",-71.098917,42.371861
4,East Cambridge,149 Cambridge Street,Cambridge,Massachusetts,2141,,6-10 months,,Law Offices,,Actively leasing,"Anthony Gargano, trustee of Gargano Family Tru...",(617) 876-6780,-71.078259,42.371


In [5]:
# Calculate distance from MIT and Harvard

harv_lat = 42.3744
harv_long = -71.1171
mit_lat = 42.3598
mit_long = -71.0921

vacancies["harv_dist"] = ""
vacancies["mit_dist"] = ""
vacancies["min_dist"] = ""

for i in range(len(vacancies.index)):
    vacancies["harv_dist"][i] = haversine(harv_long, harv_lat, vacancies["long"][i], vacancies["lat"][i])
    vacancies["mit_dist"][i] = haversine(mit_long, mit_lat, vacancies["long"][i], vacancies["lat"][i])
    vacancies["min_dist"][i] = min(vacancies["harv_dist"][i], vacancies["mit_dist"][i])

vacancies.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  del sys.path[0]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  from ipykernel import kernelapp as app


Unnamed: 0,Commercial District,Address,City,State,Zip Code,Square Footage,Length of Vacancy,Ownership Type,Former Tenant/Current Business,Business for Sale,Leasing Activity,Recorded Owner,Leasing Contact,long,lat,harv_dist,mit_dist,min_dist
0,Smaller Districts,123 River Street,Cambridge,Massachusetts,2139,900.0,5 years or more,Individual,River Gods,,Leased pending occupancy,"Arthur N. Dobelis 263 Hicks St. #4 Brooklyn, N...","Arthur Dobelis, (646) 722-0824",-71.108716,42.364045,1.341712,1.444518,1.341712
1,Kendall Square,One Kendall Square,Cambridge,Massachusetts,2139,,6-10 months,Real Estate Investment Trust (REIT),Flat Top Johnny’s,,Unknown,"ARE-MA Region No. 59 LLC, 385 East Colorado Bo...",,-71.090117,42.367439,2.347921,0.864902,0.864902
2,Fresh Pond-Alewife,93 Blanchard Rd,Cambridge,Massachusetts,2138,,6-10 months,Trust,Knittin Kitten,,Unknown,"Hillside Garden Supply Co., 280 Blanchard Rd.,...",,-71.157993,42.394263,4.019789,6.631841,4.019789
3,Inman Square,168 Hampshire Street,Cambridge,Massachusetts,2139,840.0,5 years or more,Real Estate Investment Trust (REIT),Lapel's Dry Cleaning,,Unknown,"Berkmar LLC c/o NCP Management Company, P.O. B...","NCP Management, Inc. (617) 630-1868",-71.098917,42.371861,1.520177,1.453325,1.453325
4,East Cambridge,149 Cambridge Street,Cambridge,Massachusetts,2141,,6-10 months,,Law Offices,,Actively leasing,"Anthony Gargano, trustee of Gargano Family Tru...",(617) 876-6780,-71.078259,42.371,3.213076,1.686457,1.686457


In [6]:
vacancies.to_csv("vacancies_with_distances.csv")

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=57b93841-e07c-4246-b1c8-9c365e3b1114' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>