In [1]:
import pandas as pd
from sklearn.metrics.pairwise import haversine_distances
from math import radians
import sqlite3

In [2]:
connection = sqlite3.connect('proj4_grp2.sqlite')
data = pd.read_sql_query("SELECT * FROM realestate_info where state = 'AZ'", connection)
data.head()

Unnamed: 0,property_id,address,street_name,city,state,latitude,longitude,postcode,price,bedroom_number,bathroom_number,price_per_unit,living_space,land_space,property_type
0,8352055,"854 Citrus St, Bullhead City, AZ 86442",Citrus St,Bullhead City,AZ,35.11426,-114.618385,86442,54900.0,2.0,1.0,79.0,688.0,5000.0,MANUFACTURED
1,104049502,"411 Riverfront Dr LOT 5, Bullhead City, AZ 86442",Riverfront Dr LOT 5,Bullhead City,AZ,35.1137,-114.63446,86442,2200000.0,6.0,8.0,425.0,5173.0,6534.0,SINGLE_FAMILY
2,64954116,"1825 E Shore Villas Dr UNIT 33, Bullhead City,...",E Shore Villas Dr UNIT 33,Bullhead City,AZ,35.1227,-114.58975,86442,379900.0,3.0,3.0,230.0,1649.0,3640.0,SINGLE_FAMILY
3,71677227,"790 Stahlman Dr, Bullhead City, AZ 86442",Stahlman Dr,Bullhead City,AZ,35.1181,-114.62062,86442,549900.0,3.0,4.0,169.0,3250.0,10225.0,SINGLE_FAMILY
4,8351548,"905 Citrus St, Bullhead City, AZ 86442",Citrus St,Bullhead City,AZ,35.114693,-114.61625,86442,169900.0,3.0,2.0,146.0,1157.0,5000.0,MANUFACTURED


In [5]:
def get_similar_nearby_properties(ref_latitude, ref_longitude, ref_bedroom_number, 
                                  ref_bathroom_number, ref_price, data, max_distance_km=10,
                                max_bedroom_number_diff=1, max_bathroom_number_diff=1, max_price_diff=50000):
    # Convert latitude and longitude from degrees to radians
    data['lat_rad'] = data['latitude'].apply(lambda x: radians(x))
    data['lon_rad'] = data['longitude'].apply(lambda x: radians(x))

    # Reference point in radians
    ref_point = [radians(ref_latitude), radians(ref_longitude)]

    # Haversine formula to calculate distances
    data['distance'] = haversine_distances(data[['lat_rad', 'lon_rad']].values, [ref_point]).reshape(-1) * 6371.0

    # Filter properties within the specified distance
    nearby_properties = data[data['distance'] <= max_distance_km]

    # Filter properties with similar number of bedrooms and price
    similar_properties = nearby_properties[
        (abs(nearby_properties['bedroom_number'] - ref_bedroom_number) <= max_bedroom_number_diff) &
        (abs(nearby_properties['bathroom_number'] - ref_bathroom_number) <= max_bathroom_number_diff) &
        (abs(nearby_properties['price'] - ref_price) <= max_price_diff)
    ]

    # Sort by distance
    similar_properties = similar_properties.sort_values(by='distance')

    return similar_properties

reference_latitude = 35.114260
reference_longitude = -114.618385
reference_bedrooms = 2  
reference_bathrooms= 1
reference_price = 54900 
similar_nearby_properties = get_similar_nearby_properties(
    reference_latitude, reference_longitude, reference_bedrooms,reference_bathrooms, reference_price,
    data, max_distance_km=10, max_bedroom_number_diff=1,max_bathroom_number_diff=1, max_price_diff=50000, 
    
)

print("Similar Nearby Properties:")
print(similar_nearby_properties[['property_id', 'latitude', 'longitude', 'distance', 'bedroom_number','bathroom_number','living_space','price']])


Similar Nearby Properties:
    property_id   latitude   longitude  distance  bedroom_number  \
0       8352055  35.114260 -114.618385  0.000000             2.0   
17      8349815  35.113870 -114.607680  0.974675             2.0   
163     8357308  35.105820 -114.621320  0.975721             2.0   
82      8356379  35.106533 -114.629020  1.293857             3.0   
193     8356488  35.105473 -114.631840  1.566083             2.0   
186     8356885  35.103540 -114.630450  1.620295             2.0   
63      8355626  35.100277 -114.628280  1.796585             2.0   
171     8358492  35.103950 -114.636580  2.013355             2.0   
174     8355667  35.097440 -114.628180  2.071701             2.0   
81      8361446  35.110840 -114.576710  3.809789             1.0   
64   2064210915  35.114925 -114.576100  3.846862             1.0   
122   191761042  35.150047 -114.566290  6.186945             2.0   
129     8348586  35.150932 -114.566150  6.260328             1.0   
153     8348570  35.1