In [65]:
# Import packages

import pandas as pd
from math import radians, sin, cos, sqrt, atan2
from datetime import datetime

from sqlalchemy import create_engine, inspect, MetaData, Table, insert
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from sqlalchemy.exc import IntegrityError

In [66]:
# Read data

engine = create_engine("sqlite:///../Dataset/real_estate_data.sqlite")
metadata = MetaData()
metadata.reflect(bind = engine)

Session = sessionmaker(bind=engine)
session = Session()

# Define the Table objects
rawPropertyRentData_table = Table('rawPropertyRentData', metadata, autoload_with=engine)
rawPropertyPurchaseData_table = Table('rawPropertyPurchaseData', metadata, autoload_with=engine)

# Read data using SQLAlchemy
rawPropertyRentData_df = pd.read_sql_table(rawPropertyRentData_table.name, engine)
rawPropertyPurchaseData_df = pd.read_sql_table(rawPropertyPurchaseData_table.name, engine)

session.close()

In [67]:
# Clean Database
## Connect to db

engine = create_engine("sqlite:///../Dataset/real_estate_data.sqlite")
metadata = MetaData()
metadata.reflect(bind = engine)

enriched_property_table = metadata.tables["enrichedPropertyData"]
comparison_property_table = metadata.tables["comparisonPropertyData"]
historical_purchase_table = metadata.tables["historicalPurchaseData"]
historical_rent_table = metadata.tables["historicalRentData"]


## Clear db

Session = sessionmaker(bind=engine)
session = Session()
session.execute(enriched_property_table.delete())
session.commit()
session.execute(comparison_property_table.delete())
session.commit()
session.close()

In [68]:
# Create distance definition

def calculate_distance(lat1, lon1, lat2, lon2):
    # Radius of the Earth in meters
    R = 6371
    
    # Convert latitude and longitude from degrees to radians
    lat1 = radians(lat1)
    lon1 = radians(lon1)
    lat2 = radians(lat2)
    lon2 = radians(lon2)
    
    # Haversine formula
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    distance = R * c
    
    return distance

In [69]:
# Calculate Predicted Monthly Rent

## Open connection with DB

Session = sessionmaker(bind=engine)
session = Session()


## Calculate predicted montly rent

for index, row in rawPropertyPurchaseData_df.iterrows():
    
    # Retrieve values purchase property

    property_code = row["propertyCode"]
    property_type = row["propertyType"]
    description = row["description"]
    address = row["address"]
    municipality = row["municipality"]
    country = row["country"]
    lat_purchase = row["latitude"]
    lon_purchase = row["longitude"]
    bathrooms = row["bathrooms"]
    bedrooms = row["bedrooms"]
    size =row["size"]
    status = row["status"]
    floor = row["floor"]
    has_lift = row["hasLift"]
    new_development = row["newDevelopment"]
    new_property = row["newProperty"]
    epc = row["epc"]
    purchase_price = row["purchasePrice"]
    url = row["url"]
    image = row["image"]


    # Run through rent properties
    ## Filter on similar properties
    lower_limit_seize = size - 5
    upper_limit_seize = size + 5

    filtered_rent_property_df = rawPropertyRentData_df.loc[(rawPropertyRentData_df["propertyType"] == property_type) & 
                                                           (rawPropertyRentData_df["bathrooms"] == bathrooms) &
                                                           (rawPropertyRentData_df["bedrooms"] == bedrooms) & 
                                                           (rawPropertyRentData_df["epc"] == epc) & 
                                                           (rawPropertyRentData_df["size"] >= lower_limit_seize) & 
                                                           (rawPropertyRentData_df["size"] <= upper_limit_seize), :]
    filtered_rent_property_df.reset_index(drop=True, inplace = True)


    ## Filter on acceptable distance from purchase property
    ### Calculate distance

    distance_list = []

    for index, row in filtered_rent_property_df.iterrows():

        lat_rent = row["latitude"]
        lon_rent = row["longitude"]

        distance = calculate_distance(lat_purchase, lon_purchase, lat_rent, lon_rent)
        distance_list.append(distance)
    
    filtered_rent_property_df["Distance from property (km)"] = distance_list
    

    ### Filter on acceptable radius

    nearby_rent_property_df = filtered_rent_property_df.loc[filtered_rent_property_df["Distance from property (km)"] <= 1, :]

    if len(nearby_rent_property_df["propertyCode"]) <= 2:
        nearby_rent_property_df = filtered_rent_property_df.loc[filtered_rent_property_df["Distance from property (km)"] <= 1.5, :]

        if len(nearby_rent_property_df["propertyCode"]) <= 2:
            nearby_rent_property_df = filtered_rent_property_df.loc[filtered_rent_property_df["Distance from property (km)"] <= 2, :]

            if len(nearby_rent_property_df["propertyCode"]) <= 2:
                nearby_rent_property_df = filtered_rent_property_df.loc[filtered_rent_property_df["municipality"] == municipality, :]

                if len(nearby_rent_property_df["propertyCode"]) <= 2:
                    predicted_monthly_rent = 0
                else:
                    predicted_monthly_rent = nearby_rent_property_df["monthlyRent"].mean() 
            else:
                predicted_monthly_rent = nearby_rent_property_df["monthlyRent"].mean()  
        else:
            predicted_monthly_rent = nearby_rent_property_df["monthlyRent"].mean()
    else:
        predicted_monthly_rent = nearby_rent_property_df["monthlyRent"].mean()

    nearby_rent_property_df.reset_index(drop=True, inplace = True)


    # Save Data to enrichedPropertyData

    try:
        new_property_data = {
            "propertyCode": property_code,
            "propertyType": property_type,
            "description": description,
            "address": address,
            "municipality": municipality,
            "country": country,
            "latitude": lat_purchase,
            "longitude": lon_purchase,
            "bathrooms": bathrooms,
            "bedrooms": bedrooms,
            "size": size,
            "status": status,
            "floor": floor,
            "hasLift": int(has_lift),
            "newDevelopment": int(new_development),
            "newProperty": int(new_property),
            "epc": epc,
            "purchasePrice": purchase_price,
            "predictedMonthlyRent": predicted_monthly_rent,
            "url": url,
            "image": image
        }
        insert_query = insert(enriched_property_table).values(**new_property_data)
        session.execute(insert_query)
        session.commit()
    except IntegrityError as e:
        pass


    # Save Data to comparisonPropertyData

    if len(nearby_rent_property_df["propertyCode"]) > 0:

        for index, row in nearby_rent_property_df.iterrows():

            try:
                new_property_comp_data = {
                    "propertyCodeMain": property_code,
                    "propertyCodeComp": row["propertyCode"],
                    "propertyType": row["propertyType"],
                    "address": row["address"],
                    "municipality": row["municipality"],
                    "country": row["country"],
                    "latitude": row["latitude"],
                    "longitude": row["longitude"],
                    "bathrooms": row["bathrooms"],
                    "bedrooms": row["bedrooms"],
                    "size": row["size"],
                    "status": row["status"],
                    "floor": row["floor"],
                    "hasLift": int(row["hasLift"]),
                    "newDevelopment": int(row["newDevelopment"]),
                    "newProperty": int(row["newProperty"]),
                    "epc": row["epc"],
                    "monthlyRent": row["monthlyRent"],
                    "url": row["url"],
                    "image": row["image"]
                }
                insert_query = insert(comparison_property_table).values(**new_property_comp_data)
                session.execute(insert_query)
                session.commit()
            except IntegrityError as e:
                pass

    else:
        pass


## Close connection with DB

session.close()    

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_rent_property_df["Distance from property (km)"] = distance_list
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_rent_property_df["Distance from property (km)"] = distance_list
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_rent_property_df["Distance from property (km)"] = 

In [70]:
# Save Historical Rent Data

## Open connection with DB

Session = sessionmaker(bind=engine)
session = Session()


## Calculate historical values

today_date = datetime.today().date()
cities = rawPropertyRentData_df["municipality"].unique()

for x in range(len(cities)):

    city = cities[x]
    attribute = "Monthly LT Rent"

    relevant_data_df = rawPropertyRentData_df.loc[rawPropertyRentData_df["municipality"] == city, :]
    relevant_data_df.reset_index(drop=True, inplace = True)

    ## Number of bedrooms 

    zero_bedroom = relevant_data_df.loc[relevant_data_df["bedrooms"] == 0, "monthlyRent"].mean()
    one_bedroom = relevant_data_df.loc[relevant_data_df["bedrooms"] == 1, "monthlyRent"].mean() 
    two_bedroom = relevant_data_df.loc[relevant_data_df["bedrooms"] == 2, "monthlyRent"].mean()
    three_bedroom = relevant_data_df.loc[relevant_data_df["bedrooms"] == 3, "monthlyRent"].mean()
    four_bedroom = relevant_data_df.loc[relevant_data_df["bedrooms"] == 4, "monthlyRent"].mean()
    five_bedroom = relevant_data_df.loc[relevant_data_df["bedrooms"] == 5, "monthlyRent"].mean()
    more_than_five_bedroom = relevant_data_df.loc[relevant_data_df["bedrooms"] > 5, "monthlyRent"].mean()


    ## Size of property

    size_one = relevant_data_df.loc[(relevant_data_df["size"] >= 0) & (relevant_data_df["size"] <= 25), "monthlyRent"].mean()
    size_two = relevant_data_df.loc[(relevant_data_df["size"] >= 26) & (relevant_data_df["size"] <= 50), "monthlyRent"].mean()
    size_three = relevant_data_df.loc[(relevant_data_df["size"] >= 51) & (relevant_data_df["size"] <= 75), "monthlyRent"].mean()
    size_four = relevant_data_df.loc[(relevant_data_df["size"] >= 76) & (relevant_data_df["size"] <= 100), "monthlyRent"].mean()
    size_five = relevant_data_df.loc[(relevant_data_df["size"] >= 101) & (relevant_data_df["size"] <= 150), "monthlyRent"].mean()
    size_six = relevant_data_df.loc[(relevant_data_df["size"] >= 151) & (relevant_data_df["size"] <= 200), "monthlyRent"].mean()
    size_seven = relevant_data_df.loc[relevant_data_df["size"] > 200, "monthlyRent"].mean()

    # Clean data

    attribute_values = [zero_bedroom, one_bedroom, two_bedroom, three_bedroom, four_bedroom, five_bedroom, more_than_five_bedroom,
                        size_one, size_two, size_three, size_four, size_five, size_six, size_seven]
    
    attribute_values_cleaned = [0 if pd.isna(x) else x for x in attribute_values]

    # Save Data to DB

    try:
        new_property_data = {
            "city": city,
            "attribute": attribute,
            "date": today_date,
            "zeroBedroom": attribute_values_cleaned[0],
            "oneBedroom": attribute_values_cleaned[1],
            "twoBedroom": attribute_values_cleaned[2],
            "threeBedroom": attribute_values_cleaned[3],
            "fourBedroom": attribute_values_cleaned[4],
            "fiveBedroom": attribute_values_cleaned[5],
            "moreThanFiveBedroom": attribute_values_cleaned[6],
            "twentyFive": attribute_values_cleaned[7],
            "fifty": attribute_values_cleaned[8],
            "seventyFive": attribute_values_cleaned[9],
            "hundred": attribute_values_cleaned[10],
            "hundredFifty": attribute_values_cleaned[11],
            "twoHundred": attribute_values_cleaned[12],
            "moreThanTwoHundred": attribute_values_cleaned[13]
        }
        insert_query = insert(historical_rent_table).values(**new_property_data)
        session.execute(insert_query)
        session.commit()
    except IntegrityError as e:
        pass


## Close connection with DB

session.close()    

In [71]:
# Save Historical Purchase Data

## Open connection with DB

Session = sessionmaker(bind=engine)
session = Session()


## Calculate historical values

today_date = datetime.today().date()
cities = rawPropertyPurchaseData_df["municipality"].unique()

for x in range(len(cities)):

    city = cities[x]
    attribute = "Purchase Price"

    relevant_data_df = rawPropertyPurchaseData_df.loc[rawPropertyPurchaseData_df["municipality"] == city, :]
    relevant_data_df.reset_index(drop=True, inplace = True)

    ## Number of bedrooms 

    zero_bedroom = relevant_data_df.loc[relevant_data_df["bedrooms"] == 0, "purchasePrice"].mean()
    one_bedroom = relevant_data_df.loc[relevant_data_df["bedrooms"] == 1, "purchasePrice"].mean() 
    two_bedroom = relevant_data_df.loc[relevant_data_df["bedrooms"] == 2, "purchasePrice"].mean()
    three_bedroom = relevant_data_df.loc[relevant_data_df["bedrooms"] == 3, "purchasePrice"].mean()
    four_bedroom = relevant_data_df.loc[relevant_data_df["bedrooms"] == 4, "purchasePrice"].mean()
    five_bedroom = relevant_data_df.loc[relevant_data_df["bedrooms"] == 5, "purchasePrice"].mean()
    more_than_five_bedroom = relevant_data_df.loc[relevant_data_df["bedrooms"] > 5, "purchasePrice"].mean()


    ## Size of property

    size_one = relevant_data_df.loc[(relevant_data_df["size"] >= 0) & (relevant_data_df["size"] <= 25), "purchasePrice"].mean()
    size_two = relevant_data_df.loc[(relevant_data_df["size"] >= 26) & (relevant_data_df["size"] <= 50), "purchasePrice"].mean()
    size_three = relevant_data_df.loc[(relevant_data_df["size"] >= 51) & (relevant_data_df["size"] <= 75), "purchasePrice"].mean()
    size_four = relevant_data_df.loc[(relevant_data_df["size"] >= 76) & (relevant_data_df["size"] <= 100), "purchasePrice"].mean()
    size_five = relevant_data_df.loc[(relevant_data_df["size"] >= 101) & (relevant_data_df["size"] <= 150), "purchasePrice"].mean()
    size_six = relevant_data_df.loc[(relevant_data_df["size"] >= 151) & (relevant_data_df["size"] <= 200), "purchasePrice"].mean()
    size_seven = relevant_data_df.loc[relevant_data_df["size"] > 200, "purchasePrice"].mean()

    # Clean data

    attribute_values = [zero_bedroom, one_bedroom, two_bedroom, three_bedroom, four_bedroom, five_bedroom, more_than_five_bedroom,
                        size_one, size_two, size_three, size_four, size_five, size_six, size_seven]
    
    attribute_values_cleaned = [0 if pd.isna(x) else x for x in attribute_values]


    # Save Data to DB

    try:
        new_property_data = {
            "city": city,
            "attribute": attribute,
            "date": today_date,
            "zeroBedroom": attribute_values_cleaned[0],
            "oneBedroom": attribute_values_cleaned[1],
            "twoBedroom": attribute_values_cleaned[2],
            "threeBedroom": attribute_values_cleaned[3],
            "fourBedroom": attribute_values_cleaned[4],
            "fiveBedroom": attribute_values_cleaned[5],
            "moreThanFiveBedroom": attribute_values_cleaned[6],
            "twentyFive": attribute_values_cleaned[7],
            "fifty": attribute_values_cleaned[8],
            "seventyFive": attribute_values_cleaned[9],
            "hundred": attribute_values_cleaned[10],
            "hundredFifty": attribute_values_cleaned[11],
            "twoHundred": attribute_values_cleaned[12],
            "moreThanTwoHundred": attribute_values_cleaned[13]
        }
        insert_query = insert(historical_purchase_table).values(**new_property_data)
        session.execute(insert_query)
        session.commit()
    except IntegrityError as e:
        pass


## Close connection with DB

session.close()    