In [1]:
# Import packages

import pandas as pd
from math import radians, sin, cos, sqrt, atan2
from datetime import datetime

from sqlalchemy import create_engine, inspect, MetaData, Table, insert
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker

In [2]:
# Read data

engine = create_engine("sqlite:///../Dataset/real_estate_data.sqlite")
conn = engine.connect()

## Rent Data

sql_query_rent = '''
    SELECT *
    FROM propertyRent
'''

## Purchase Data

sql_query_purchase = '''
    SELECT *
    FROM propertyPurchase
'''

rent_property_df = pd.read_sql(sql_query_rent, conn)
purchase_property_df = pd.read_sql(sql_query_purchase, conn)

In [3]:
# Create distance definition

def calculate_distance(lat1, lon1, lat2, lon2):
    # Radius of the Earth in meters
    R = 6371
    
    # Convert latitude and longitude from degrees to radians
    lat1 = radians(lat1)
    lon1 = radians(lon1)
    lat2 = radians(lat2)
    lon2 = radians(lon2)
    
    # Haversine formula
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    distance = R * c
    
    return distance


In [5]:
# Create enriched property DF

## Create lists

purchase_property_code_list = []
purchase_property_type_list = []
purchase_description_list = []
purchase_address_list = []
purchase_municipality_list = []
purchase_country_list = []
purchase_lat_list = []
purchase_lon_list = []
purchase_bathrooms_list = []
purchase_bedrooms_list = []
purchase_size_list = []
purchase_status_list = []
purchase_floor_list = []
purchase_has_lift_list = []
purchase_new_development_list = []
purchase_epc_list = []
purchase_purchase_price_list = []
purchase_predicted_monthly_rent_list = []
purchase_url_list = []
purchase_image_list = []

rent_purchase_property_code_list = []
rent_property_code_list = []
rent_property_type_list = []
rent_lat_list = []
rent_lon_list = []
rent_bathrooms_list = []
rent_bedrooms_list = []
rent_size_list = []
rent_status_list = []
rent_floor_list = []
rent_has_lift_list = []
rent_epc_list = []
rent_rental_price_list = []
rent_url_list = []
rent_image_list = []


for index, row in purchase_property_df.iterrows():
    
    # Retrieve values purchase property

    property_code = row["propertyCode"]
    property_type = row["propertyType"]
    description = row["description"]
    address = row["address"]
    municipality = row["municipality"]
    country = row["country"]
    lat_purchase = row["latitude"]
    lon_purchase = row["longitude"]
    bathrooms = row["bathrooms"]
    bedrooms = row["bedrooms"]
    size =row["size"]
    status = row["status"]
    floor = row["floor"]
    has_lift = row["hasLift"]
    new_development = row["newDevelopment"]
    epc = row["epc"]
    purchase_price = row["price"]
    url = row["url"]
    image = row["image"]


    # Run through rent properties

    ## Filter on similar properties
   
    filtered_rent_property_df = rent_property_df.loc[rent_property_df["propertyType"] == property_type, :]
    filtered_rent_property_df = filtered_rent_property_df.loc[filtered_rent_property_df["bathrooms"] == bathrooms, : ]
    filtered_rent_property_df = filtered_rent_property_df.loc[filtered_rent_property_df["bedrooms"] == bedrooms, : ]
    filtered_rent_property_df = filtered_rent_property_df.loc[filtered_rent_property_df["epc"] == epc, : ]
    filtered_rent_property_df = filtered_rent_property_df.loc[(filtered_rent_property_df["size"] >= (size - 5)) | (filtered_rent_property_df["size"] <= (size + 5)), : ]

    ## Filter on acceptable distance from purchase property
    ### Calculate distance

    filtered_rent_property_df.reset_index(drop=True, inplace = True)
    distance_list = []

    for y in range(len(filtered_rent_property_df["propertyCode"])):

        lat_rent = filtered_rent_property_df["latitude"][y]
        lon_rent = filtered_rent_property_df["longitude"][y]

        distance = calculate_distance(lat_purchase, lon_purchase, lat_rent, lon_rent)
        distance_list.append(distance)
    
    filtered_rent_property_df["Distance from property (km)"] = distance_list
    
    ### Filter on acceptable radius

    nearby_rent_property_df = filtered_rent_property_df.loc[filtered_rent_property_df["Distance from property (km)"] <= 0.5, :]
    nearby_rent_property_df.reset_index(drop=True, inplace = True)

    ### Calculate average monthly rent

    purchase_predicted_monthly_rent = nearby_rent_property_df["price"].mean()


    # Add data to lists

    ## Purchase Property Details

    purchase_property_code_list.append(property_code)
    purchase_property_type_list.append(property_type)
    purchase_description_list.append(description)
    purchase_address_list.append(address)
    purchase_municipality_list.append(municipality)
    purchase_country_list.append(country)
    purchase_lat_list.append(lat_purchase)
    purchase_lon_list.append(lon_purchase)
    purchase_bathrooms_list.append(bathrooms)
    purchase_bedrooms_list.append(bedrooms)
    purchase_size_list.append(size)
    purchase_status_list.append(status)
    purchase_floor_list.append(floor)
    purchase_has_lift_list.append(has_lift )
    purchase_new_development_list.append(new_development)
    purchase_epc_list.append(epc)
    purchase_purchase_price_list.append(purchase_price)
    purchase_predicted_monthly_rent_list.append(purchase_predicted_monthly_rent )
    purchase_url_list.append(url)
    purchase_image_list.append(image)


    ## Comparable Rental Properties

    for z in range(len(nearby_rent_property_df["propertyCode"])):

        rent_purchase_property_code_list.append(property_code)
        rent_property_code_list.append(nearby_rent_property_df["propertyCode"][z])
        rent_property_type_list.append(nearby_rent_property_df["propertyType"][z])
        rent_lat_list.append(nearby_rent_property_df["latitude"][z])
        rent_lon_list.append(nearby_rent_property_df["longitude"][z])
        rent_bathrooms_list.append(nearby_rent_property_df["bathrooms"][z])
        rent_bedrooms_list.append(nearby_rent_property_df["bedrooms"][z])
        rent_size_list.append(nearby_rent_property_df["size"][z])
        rent_status_list.append(nearby_rent_property_df["status"][z])
        rent_floor_list.append(nearby_rent_property_df["floor"][z])
        rent_has_lift_list.append(nearby_rent_property_df["hasLift"][z])
        rent_epc_list.append(nearby_rent_property_df["epc"][z])
        rent_rental_price_list.append(nearby_rent_property_df["price"][z])
        rent_url_list.append(nearby_rent_property_df["url"][z])
        rent_image_list.append(nearby_rent_property_df["image"][z])


enriched_purchase_property_df = pd.DataFrame({"Property Code": purchase_property_code_list,
                                              "Property Type": purchase_property_type_list,
                                              "Description": purchase_description_list,
                                              "Address": purchase_address_list,
                                              "Municipality": purchase_municipality_list,
                                              "Country": purchase_country_list,
                                              "Latitude": purchase_lat_list,
                                              "Longitude": purchase_lon_list,
                                              "# Bathrooms": purchase_bathrooms_list,
                                              "# Bedrooms": purchase_bedrooms_list,
                                              "Size (m2)": purchase_size_list,
                                              "Status": purchase_status_list,
                                              "Floor": purchase_floor_list,
                                              "Has Lift": purchase_has_lift_list,
                                              "New Development": purchase_new_development_list,
                                              "EPC": purchase_epc_list,
                                              "Purchase Price": purchase_purchase_price_list,
                                              "Predicted rent price (LT)": purchase_predicted_monthly_rent_list,
                                              "URL": purchase_url_list,
                                              "Image": purchase_image_list})

purchased_property_comp_df = pd.DataFrame({"Property Code": rent_purchase_property_code_list,
                                            "Property Code (Comp rent)": rent_property_code_list,
                                            "Property Type (Comp rent)": rent_property_type_list,
                                            "Latitude": rent_lat_list,
                                            "Longitude": rent_lon_list,
                                            "# Bathrooms (Comp rent)": rent_bathrooms_list,
                                            "# Bedrooms (Comp rent)": rent_bedrooms_list,
                                            "Size (m2) (Comp rent)": rent_size_list,
                                            "Status (Comp rent)": rent_status_list,
                                            "Floor (Comp rent)": rent_floor_list,
                                            "Has Lift (Comp rent)": rent_has_lift_list,
                                            "EPC (Comp rent)": rent_epc_list,
                                            "Monthly rental price (LT) (Comp rent)": rent_rental_price_list,
                                            "URL (Comp rent)": rent_url_list,
                                            "Image (Comp rent)": rent_image_list})

In [9]:
purchased_property_comp_df.to_excel(r"C:\Users\Brecht Nys\OneDrive\Desktop\Brecht\RealEstateTracker\real-estate-tracker\Dataset\purchased property.xlsx")


In [6]:
# Calculate Historical Data

today_date = datetime.today().date()

cities = purchase_property_df["municipality"].unique()
attributes = ["Purchase Price", "Monthly LT Rental"]

city_list = []
attribute_list = []
date_list = []
zero_bedrooms_list = []
one_bedroom_list = []
two_bedroom_list = []
three_bedroom_list = []
four_bedroom_list = []
five_bedroom_list = []
more_than_five_bedroom_list = [] 
size_one_list = []
size_two_list = []
size_three_list = []
size_four_list = []
size_five_list = []
size_six_list = []
size_seven_list = []

for x in range(len(cities)):

    for y in range(len(attributes)):
        city_value = cities[x]
        city_list.append(city_value)
        attribute_value = attributes[y]
        attribute_list.append(attribute_value)
        date_list.append(today_date)

        if attribute_value == "Purchase Price":

            relevant_data_df = purchase_property_df.loc[purchase_property_df["municipality"] == city_value, :]
        
        elif attribute_value == "Monthly LT Rental":

            relevant_data_df = rent_property_df.loc[rent_property_df["municipality"] == city_value, :]
        
        else:
            pass

        # Calculate Value 
        ## Number of bedrooms

        try:
            zero_bedrooms = relevant_data_df.loc[relevant_data_df["bedrooms"] == 0, "price"].mean()
        except:
            zero_bedrooms = 0
        try:
            one_bedroom = relevant_data_df.loc[relevant_data_df["bedrooms"] == 1, "price"].mean()
        except:
            one_bedroom = 0
        try:
            two_bedroom = relevant_data_df.loc[relevant_data_df["bedrooms"] == 2, "price"].mean()
        except:
            two_bedroom = 0
        try:
            three_bedroom = relevant_data_df.loc[relevant_data_df["bedrooms"] == 3, "price"].mean()
        except:
            three_bedroom = 0
        try:
            four_bedroom = relevant_data_df.loc[relevant_data_df["bedrooms"] == 4, "price"].mean()
        except:
            four_bedroom = 0
        try:
            five_bedroom = relevant_data_df.loc[relevant_data_df["bedrooms"] == 5, "price"].mean()
        except:
            five_bedroom = 0
        try:
            more_than_five_bedroom = relevant_data_df.loc[relevant_data_df["bedrooms"] > 5, "price"].mean()
        except:
            more_than_five_bedroom = 0
        
        ## Size of property

        size_one = relevant_data_df.loc[((relevant_data_df["size"] >= 0) & (relevant_data_df["size"] <= 25)), "price"].mean()
        size_two = relevant_data_df.loc[((relevant_data_df["size"] >= 26) & (relevant_data_df["size"] <= 50)), "price"].mean()
        size_three = relevant_data_df.loc[((relevant_data_df["size"] >= 51) & (relevant_data_df["size"] <= 75)), "price"].mean()
        size_four = relevant_data_df.loc[((relevant_data_df["size"] >= 76) & (relevant_data_df["size"] <= 100)), "price"].mean()
        size_five = relevant_data_df.loc[((relevant_data_df["size"] >= 101) & (relevant_data_df["size"] <= 150)), "price"].mean()
        size_six = relevant_data_df.loc[((relevant_data_df["size"] >= 151) & (relevant_data_df["size"] <= 200)), "price"].mean()
        size_seven = relevant_data_df.loc[relevant_data_df["size"] > 200, "price"].mean()


        # Add to list

        zero_bedrooms_list.append(zero_bedrooms)
        one_bedroom_list.append(one_bedroom)
        two_bedroom_list.append(two_bedroom)
        three_bedroom_list.append(three_bedroom)
        four_bedroom_list.append(four_bedroom)
        five_bedroom_list.append(five_bedroom)
        more_than_five_bedroom_list.append(more_than_five_bedroom) 
        size_one_list.append(size_one)
        size_two_list.append(size_two)
        size_three_list.append(size_three)
        size_four_list.append(size_four)
        size_five_list.append(size_five)
        size_six_list.append(size_six)
        size_seven_list.append(size_seven)
            

historical_data_df = pd.DataFrame({"City": city_list,
                                   "Attribute": attribute_list,
                                   "Date": date_list,
                                   "0 bedrooms": zero_bedrooms_list,
                                   "1 bedroom": one_bedroom_list,
                                   "2 bedrooms": two_bedroom_list,
                                   "3 bedrooms": three_bedroom_list,
                                   "4 bedrooms": four_bedroom_list,
                                   "5 bedrooms": five_bedroom_list,
                                   "> 5 bedrooms": more_than_five_bedroom_list,
                                   "0-25 m2": size_one_list,
                                   "26-50 m2": size_two_list,
                                   "51-75 m2": size_three_list,
                                   "76-100 m2": size_four_list,
                                   "101-150 m2": size_five_list,
                                   "151-200 m2": size_six_list,
                                   "> 200 m2": size_seven_list})



In [10]:
# Save Data

## Connect to db

engine = create_engine("sqlite:///../Dataset/real_estate_data.sqlite")
metadata = MetaData()
metadata.reflect(bind = engine)

enriched_property_table = metadata.tables["propertyEnriched"]
property_comp_table = metadata.tables["propertyComp"]
historical_data_table = metadata.tables["propertyHistorical"]

## Clear db

Session = sessionmaker(bind=engine)
session = Session()
session.execute(enriched_property_table.delete())
session.commit()
session.execute(property_comp_table.delete())
session.commit()
session.close()

## Add new data to db

Session = sessionmaker(bind=engine)
session = Session()

### Enriched Property File

for property in range(len(enriched_purchase_property_df["Property Code"])):
    new_property_data = {
        "propertyCode": enriched_purchase_property_df["Property Code"][property],
        "propertyType": enriched_purchase_property_df["Property Type"][property],
        "description": enriched_purchase_property_df["Description"][property],
        "address": enriched_purchase_property_df["Address"][property],
        "municipality": enriched_purchase_property_df["Municipality"][property],
        "country": enriched_purchase_property_df["Country"][property],
        "lat": enriched_purchase_property_df["Latitude"][property],
        "lon": enriched_purchase_property_df["Longitude"][property],
        "bathrooms": enriched_purchase_property_df["# Bathrooms"][property],
        "bedrooms": enriched_purchase_property_df["# Bedrooms"][property],
        "size": enriched_purchase_property_df["Size (m2)"][property],
        "status": enriched_purchase_property_df["Status"][property],
        "floor": enriched_purchase_property_df["Floor"][property],
        "hasLift": enriched_purchase_property_df["Has Lift"][property],
        "newDevelopment": enriched_purchase_property_df["New Development"][property],
        "epc": enriched_purchase_property_df["EPC"][property],
        "price": enriched_purchase_property_df["Purchase Price"][property],
        "predictedRentPrice": enriched_purchase_property_df["Predicted rent price (LT)"][property],
        "url": enriched_purchase_property_df["URL"][property],
        "image": enriched_purchase_property_df["Image"][property]
    }
    insert_query = insert(enriched_property_table).values(**new_property_data)
    session.execute(insert_query)
    session.commit()


### Property Comp File

for property in range(len(purchased_property_comp_df ["Property Code"])):
    new_property_data = {
        "propertyCode": purchased_property_comp_df ["Property Code"][property],
        "propertyCodeComp": purchased_property_comp_df ["Property Code (Comp rent)"][property],
        "propertyTypeComp": purchased_property_comp_df ["Property Type (Comp rent)"][property],
        "latComp": purchased_property_comp_df ["Latitude"][property],
        "lonComp": purchased_property_comp_df ["Longitude"][property],
        "bathroomsComp": purchased_property_comp_df ["# Bathrooms (Comp rent)"][property],
        "bedroomsComp": purchased_property_comp_df ["# Bedrooms (Comp rent)"][property],
        "sizeComp": purchased_property_comp_df ["Size (m2) (Comp rent)"][property],
        "statusComp": purchased_property_comp_df ["Status (Comp rent)"][property],
        "floorComp": purchased_property_comp_df ["Floor (Comp rent)"][property],
        "hasLiftComp": purchased_property_comp_df ["Has Lift (Comp rent)"][property],
        "epcComp": purchased_property_comp_df ["EPC (Comp rent)"][property],
        "monthlyRentalPriceLtComp": purchased_property_comp_df ["Monthly rental price (LT) (Comp rent)"][property],
        "urlComp": purchased_property_comp_df ["URL (Comp rent)"][property],
        "imageComp": purchased_property_comp_df ["Image (Comp rent)"][property]
    }
    insert_query = insert(property_comp_table).values(**new_property_data)
    session.execute(insert_query)
    session.commit()

### Historical Property Data

date_to_check = historical_data_df["Date"][0]
delete_query = historical_data_table.delete().where(historical_data_table.c.date == date_to_check)
session.execute(delete_query)

for property in range(len(historical_data_df["City"])):

    new_property_data = {
        "city": historical_data_df["City"][property],
        "attribute": historical_data_df["Attribute"][property],
        "date": historical_data_df["Date"][property],
        "zeroBedroom": historical_data_df["0 bedrooms"][property],
        "oneBedroom": historical_data_df["1 bedroom"][property],
        "twoBedroom": historical_data_df["2 bedrooms"][property],
        "threeBedroom": historical_data_df["3 bedrooms"][property],
        "fourBedroom": historical_data_df["4 bedrooms"][property],
        "fiveBedroom": historical_data_df["5 bedrooms"][property],
        "moreThanFiveBedroom": historical_data_df["> 5 bedrooms"][property],
        "twentyFive": historical_data_df["0-25 m2"][property],
        "fifty": historical_data_df["26-50 m2"][property],
        "seventyFive": historical_data_df["51-75 m2"][property],
        "hundred": historical_data_df["76-100 m2"][property],
        "hundredFifty": historical_data_df["101-150 m2"][property],
        "twoHundred": historical_data_df["151-200 m2"][property],
        "moreThanTwoHundred": historical_data_df["> 200 m2"][property]
    }
    insert_query = insert(historical_data_table).values(**new_property_data)
    session.execute(insert_query)
    session.commit()


session.close()
