In [226]:
import pandas as pd
pd.set_option('display.max_columns', None)  # Show all columns in DataFrame display
pd.set_option('display.width', None)        # Don't truncate wide rows


In [227]:
import pandas as pd
import numpy as np
import asyncio
from KNN_faiss import GeoKNNSearch
from property_scraper import PropertyScraper
import re  # Import the scraper class
from property_analyzer import PropertyAnalyzer

In [228]:
def load_data():
    """Loads property dataset and splits into recent (2025) and historical records."""
    data_enrich_train = pd.read_excel("Data/final_merged_output.xlsx")

    return data_enrich_train

In [262]:
def initialize_knn(data_enrich_train):
    """Initializes the KNN model with historical property data."""
    return GeoKNNSearch(
        data=data_enrich_train[['Property_Index', 'position.lat', 'position.long']],
        lat_col='position.lat', lon_col='position.long', id_col='Property_Index',
        use_exact_distance=True
    )

In [263]:
df=load_data()
new_df = df.iloc[1:].reset_index(drop=True)

In [265]:
columns_to_keep = [
    "Matched_PID",
    "Input_PAON",
    "Input_SAON",
    "Input_Street",
    "Input_Postcode",
    "Property_Index",
    "property.type",
    "property.tenure",
    "property.age",
    "position.lat",
    "position.long",
    "property.highestPricePaid.value",
    "property.highestPricePaid.amount",
    "property.lowestPricePaid.value",
    "property.lowestPricePaid.amount",
    "property.estimatedCurrentValue.value",
    "property.estimatedCurrentValue.amount",
    "property.estimatedCurrentValue.floorArea",
    "property.estimatedCurrentValue.pricePerSqm",
    "property.estimatedCurrentValue.confidence"
]
new_df = new_df[columns_to_keep]


In [266]:
sample = new_df.iloc[3]
latitude=[sample["position.lat"]]
longitude=[sample["position.long"]]
latitude_np = np.float64(latitude[0])
longitude_np=np.float64(longitude[0])

In [267]:
subject_property_coordinates = (latitude_np, longitude_np)
knn = initialize_knn(new_df)
property_indices, distances = knn.knearest(subject_property_coordinates, 500, return_distances=True)
# Convert to a dictionary to get unique indices and their minimum distances
unique_distances = {}
for index, distance in zip(property_indices, distances):
    if index in unique_distances:
        if unique_distances[index] != distance:
            print(f"Conflict for index {index}: existing distance {unique_distances[index]}, new distance {distance}")
    else:
        unique_distances[index] = distance

potential_comparables = np.array(list(unique_distances.items()))

In [268]:
sample

Matched_PID                                   edd93a5663abe8b8b7bb930acf6c053e
Input_PAON                                                                  35
Input_SAON                                                                 NaN
Input_Street                                                 westmoreland road
Input_Postcode                                                        sw13 9rz
Property_Index                                                     SW13 9RZ 35
property.type                                                    Semi Detached
property.tenure                                                       Freehold
property.age                                                         1930-1949
position.lat                                                         51.476907
position.long                                                        -0.242901
property.highestPricePaid.value                                      2350000.0
property.highestPricePaid.amount                    

In [269]:
new_potential_comparables = pd.DataFrame(potential_comparables, columns=['Property_Index', 'Physical Distance'])

# Convert 'Physical Distance' to float
new_potential_comparables['Physical Distance'] = new_potential_comparables['Physical Distance'].astype(np.float64)

# Now merge with df on Property_Index
pc_df = new_df.merge(new_potential_comparables, on='Property_Index', how='inner')

In [270]:
new_potential_comparables = pd.read_excel("Data/premium_property_enrich_individual_properties.xlsx")

# Step 2: Ensure 'Property_Index' exists in both DataFrames
if 'Property_Index' not in new_potential_comparables.columns:
    raise ValueError("The new dataframe does not have a 'Property_Index' column.")

# Step 3: Group by 'Property_Index' and sum the 'Price' column
price_df = new_potential_comparables.groupby('Property_Index', as_index=False)['Price'].sum()

# Step 4: Merge with pc_df
pc_with_price = pc_df.merge(price_df, on='Property_Index', how='left')

# Step 5: Identify and report unmatched rows
unmatched = pc_with_price[pc_with_price['Price'].isna()]
if not unmatched.empty:
    print("Unmatched Property_Index rows:")
    print(unmatched['Property_Index'].tolist())
    print(f"Total unmatched rows: {len(unmatched)}")

# Drop unmatched rows where 'Price' is NaN
pc_with_price = pc_with_price.dropna(subset=['Price']).reset_index(drop=True)


Unmatched Property_Index rows:
['SW15 1AL EGLISTON LAWNS 13 3', 'W6 9PF QUEENS WHARF 2 FLAT 141', 'W6 9PF QUEENS WHARF 2 FLAT 140', 'W6 9NE QUEENS WHARF 2 FLAT 24', 'W6 9NE QUEENS WHARF 2 FLAT 44', 'W6 9NE QUEENS WHARF 2 FLAT 41', 'SW13 8AH HANDEL MANSIONS 94 FLAT 20', 'W6 9PF QUEENS WHARF 2 FLAT 147', 'W6 9NE QUEENS WHARF 2 FLAT 8', 'SW13 8AH HANDEL MANSIONS 94 FLAT 20', 'SW13 8AH HANDEL MANSIONS 94 FLAT 20', 'W6 9PF QUEENS WHARF 2 FLAT 163', 'SW6 6NZ ELM LODGE 75 FLAT 9', 'SW13 8HT WILLIAM HUNT MANSIONS 4 FLAT 60', 'SW13 8AH HANDEL MANSIONS 94 FLAT 20']
Total unmatched rows: 15


In [271]:
import pandas as pd

# Step 1: Load the Excel file
df = pd.read_excel("Data/premium_property_enrich_individual_properties.xlsx")

# Step 2: Get the Property_Index from the sample Series
property_index = sample['Property_Index']

# Step 3: Filter the dataframe to match the Property_Index
matching_rows = df[df['Property_Index'] == property_index]

if not matching_rows.empty:
    # Step 4: Sum the Price values if there are duplicates
    total_price = matching_rows['Price'].sum()

    # Step 5: Add Price to the sample Series
    sample['Price'] = total_price
else:
    print(f"No match found for Property_Index: {property_index}")


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sample['Price'] = total_price
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sample['Price'] = total_price


In [272]:
# Step 1: Sort by physical distance
nearest_5 = pc_with_price.sort_values(by='Physical Distance').head(10).copy()

# Step 2: Avoid division by zero (if distance = 0, add a tiny number)
nearest_5['Weight'] = 1 / (nearest_5['Physical Distance'] + 1e-6)

# Step 3: Calculate weighted average price
weighted_mean_price = (nearest_5['Price'] * nearest_5['Weight']).sum() / nearest_5['Weight'].sum()

# Step 4: Define 50% price bounds
lower_bound = weighted_mean_price * 0.5
upper_bound = weighted_mean_price * 1.5

# Step 5: Filter original dataframe based on those bounds
final_potential_comparables = pc_with_price[
    (pc_with_price['Price'] >= lower_bound) &
    (pc_with_price['Price'] <= upper_bound)
]

# Optional: print weighted mean and bounds
print(f"Weighted Mean Price: {weighted_mean_price:.2f}")
print(f"Allowed Price Range: {lower_bound:.2f} - {upper_bound:.2f}")


Weighted Mean Price: 2358037.06
Allowed Price Range: 1179018.53 - 3537055.59


In [273]:
# Step 6: Find additional rows matching Street, excluding same Property_Index and already-included ones
additional_rows = pc_with_price[
    (pc_with_price['Input_Street'] == sample['Input_Street']) &
    (pc_with_price['Property_Index'] != sample['Property_Index']) &
    (~pc_with_price['Property_Index'].isin(final_potential_comparables['Property_Index']))
]

# Step 7: Add those rows to final_potential_comparables
final_potential_comparables = pd.concat([final_potential_comparables, additional_rows], ignore_index=True)


In [274]:
import pandas as pd
import numpy as np

# Define columns to check
columns_to_check = [
    'property.type', 'property.tenure', 'property.age', 'position.lat', 'position.long',
    'property.highestPricePaid.value', 'property.highestPricePaid.amount',
    'property.lowestPricePaid.value', 'property.lowestPricePaid.amount',
    'property.estimatedCurrentValue.value', 'property.estimatedCurrentValue.amount',
    'property.estimatedCurrentValue.floorArea', 'property.estimatedCurrentValue.pricePerSqm',
    'property.estimatedCurrentValue.confidence', 'Physical Distance', 'Price'
]

# Step 1: Replace blank strings with NaN
df_cleaned = final_potential_comparables.copy()
df_cleaned[columns_to_check] = df_cleaned[columns_to_check].replace(r'^\s*$', np.nan, regex=True)

# Step 2: Count rows before dropping
initial_rows = df_cleaned.shape[0]

# Step 3: Drop rows with any NaN in the selected columns
df_cleaned = df_cleaned.dropna(subset=columns_to_check).reset_index(drop=True)

# Step 4: Count rows after dropping
final_rows = df_cleaned.shape[0]
dropped_rows = initial_rows - final_rows

print(f"Removed {dropped_rows} rows due to missing or invalid values in critical columns.")

# Optional: Assign back to your original variable
final_potential_comparables = df_cleaned


Removed 9 rows due to missing or invalid values in critical columns.


  df_cleaned[columns_to_check] = df_cleaned[columns_to_check].replace(r'^\s*$', np.nan, regex=True)


In [276]:
final_potential_comparables.columns

Index(['Matched_PID', 'Input_PAON', 'Input_SAON', 'Input_Street',
       'Input_Postcode', 'Property_Index', 'property.type', 'property.tenure',
       'property.age', 'position.lat', 'position.long',
       'property.highestPricePaid.value', 'property.highestPricePaid.amount',
       'property.lowestPricePaid.value', 'property.lowestPricePaid.amount',
       'property.estimatedCurrentValue.value',
       'property.estimatedCurrentValue.amount',
       'property.estimatedCurrentValue.floorArea',
       'property.estimatedCurrentValue.pricePerSqm',
       'property.estimatedCurrentValue.confidence', 'Physical Distance',
       'Price'],
      dtype='object')

In [277]:
final_potential_comparables

Unnamed: 0,Matched_PID,Input_PAON,Input_SAON,Input_Street,Input_Postcode,Property_Index,property.type,property.tenure,property.age,position.lat,position.long,property.highestPricePaid.value,property.highestPricePaid.amount,property.lowestPricePaid.value,property.lowestPricePaid.amount,property.estimatedCurrentValue.value,property.estimatedCurrentValue.amount,property.estimatedCurrentValue.floorArea,property.estimatedCurrentValue.pricePerSqm,property.estimatedCurrentValue.confidence,Physical Distance,Price
0,edd93a5663abe8b8b7bb930acf6c053e,35,,westmoreland road,sw13 9rz,SW13 9RZ 35,Semi Detached,Freehold,1930-1949,51.476907,-0.242901,2350000.0,"£2,350,000",840000.0,"£840,000",2703774.0,"£2,703,774",255,10603.035294,High,0.000142,2350000.0
1,57974246df65cb387ccd9dacbe0f13ef,4,,bellevue road,sw13 0bj,SW13 0BJ 4,Mid Terrace,Freehold,1900-1929,51.471997,-0.240512,2150000.0,"£2,150,000",2150000.0,"£2,150,000",2502930.0,"£2,502,930",208,12033.317308,High,0.570571,2150000.0
2,b1b987e09288d235cd7fb30b6c86b3e0,millers court,18,chiswick mall,w4 2pf,W4 2PF MILLERS COURT 18,End Terrace,Freehold,1976-1982,51.489039,-0.244783,2725000.0,"£2,725,000",690000.0,"£690,000",3162674.0,"£3,162,674",176,17969.738636,High,1.355436,2725000.0
3,a747715f3e9a6cb85bfd6730ea7f67f2,29,,palladian gardens,w4 2er,W4 2ER 29,Mid Terrace,Leasehold,2018,51.486462,-0.255222,2225000.0,"£2,225,000",2000000.0,"£2,000,000",2569519.0,"£2,569,519",224,11471.066964,High,1.362600,2225000.0
4,efffa5f2307641abf0df95ab7f62a122,20,,beverley road,sw13 0lx,SW13 0LX 20,Semi Detached,Freehold,before 1900,51.469128,-0.246497,2500000.0,"£2,500,000",2500000.0,"£2,500,000",2925455.0,"£2,925,455",201,14554.502488,High,0.900175,2500000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
353,69a319ebc9f49db696ae0c7a6250591d,347a,,upper richmond road,sw15 6xp,SW15 6XP 347A,Semi Detached,Freehold,2014,51.464225,-0.236161,2200000.0,"£2,200,000",1600000.0,"£1,600,000",2278902.0,"£2,278,902",273,8347.626374,High,1.485373,2200000.0
354,f59467c254f31024fe380d4c75119b5a,14,,bellevue road,sw13 0bj,SW13 0BJ 14,Mid Terrace,Freehold,1900-1929,51.472233,-0.240662,2555000.0,"£2,555,000",740000.0,"£740,000",2564496.0,"£2,564,496",199,12886.914573,High,0.542406,2555000.0
355,1239fd348f8e2d9972017ab706f22c30,24,,parke road,sw13 9ng,SW13 9NG 24,Detached,Freehold,1950-1966,51.479960,-0.243201,2620000.0,"£2,620,000",390000.0,"£390,000",2613199.0,"£2,613,199",289,9042.211073,High,0.340259,2620000.0
356,8040ac6220e752abfafa2d77103ae3d6,10,,the terrace,sw13 0np,SW13 0NP 10,End Terrace,Freehold,before 1900,51.472747,-0.252161,2740000.0,"£2,740,000",2740000.0,"£2,740,000",2841014.0,"£2,841,014",400,7102.535000,High,0.790709,2740000.0


In [278]:
sample['Physical Distance']=0.0

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sample['Physical Distance']=0.0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sample['Physical Distance']=0.0


In [279]:
import pandas as pd
import numpy as np

# Columns
categorical_cols = ['property.type', 'property.tenure', 'property.age']
amount_cols = [
    'property.highestPricePaid.value', 'property.highestPricePaid.amount',
    'property.lowestPricePaid.value', 'property.lowestPricePaid.amount',
    'property.estimatedCurrentValue.value', 'property.estimatedCurrentValue.amount',
    'property.estimatedCurrentValue.floorArea', 'property.estimatedCurrentValue.pricePerSqm',
    'Physical Distance'
]

# Make a copy
df = final_potential_comparables.copy()

# === Step 1: Clean and convert amount columns ===
def clean_currency(val):
    if isinstance(val, str):
        val = val.replace("£", "").replace(",", "").strip()
    try:
        return float(val)
    except:
        return np.nan

for col in amount_cols:
    df[col] = df[col].apply(clean_currency)

# === Step 2: Encode categorical columns ===
from sklearn.preprocessing import OrdinalEncoder

encoder = OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1)
df[categorical_cols] = encoder.fit_transform(df[categorical_cols])

# === Step 3: Apply same transformation to sample (Series) ===
def preprocess_sample(sample, encoder, categorical_cols, amount_cols):
    sample = sample.copy()

    # Clean amounts
    for col in amount_cols:
        val = sample[col]
        if isinstance(val, str):
            val = val.replace("£", "").replace(",", "").strip()
        try:
            sample[col] = float(val)
        except:
            sample[col] = np.nan

    # Encode categoricals
    cat_values = sample[categorical_cols].values.reshape(1, -1)
    sample[categorical_cols] = encoder.transform(cat_values)[0]

    return sample

# Apply to sample
sample = preprocess_sample(sample, encoder, categorical_cols, amount_cols)




In [280]:
final_df=df.copy()

In [281]:
final_df.columns

Index(['Matched_PID', 'Input_PAON', 'Input_SAON', 'Input_Street',
       'Input_Postcode', 'Property_Index', 'property.type', 'property.tenure',
       'property.age', 'position.lat', 'position.long',
       'property.highestPricePaid.value', 'property.highestPricePaid.amount',
       'property.lowestPricePaid.value', 'property.lowestPricePaid.amount',
       'property.estimatedCurrentValue.value',
       'property.estimatedCurrentValue.amount',
       'property.estimatedCurrentValue.floorArea',
       'property.estimatedCurrentValue.pricePerSqm',
       'property.estimatedCurrentValue.confidence', 'Physical Distance',
       'Price'],
      dtype='object')

In [282]:
columns_to_drop = [
    'Matched_PID',
    'Input_PAON',
    'Input_Postcode',
    'Input_SAON',
    'Input_Street',
    'property.tenure',
    'Property_Index',
    'position.lat',
    'position.long',
    'property.estimatedCurrentValue.confidence',
    'Price'
]

final_df = final_df.drop(columns=columns_to_drop)

In [283]:
final_df

Unnamed: 0,property.type,property.age,property.highestPricePaid.value,property.highestPricePaid.amount,property.lowestPricePaid.value,property.lowestPricePaid.amount,property.estimatedCurrentValue.value,property.estimatedCurrentValue.amount,property.estimatedCurrentValue.floorArea,property.estimatedCurrentValue.pricePerSqm,Physical Distance
0,5.0,1.0,2350000.0,2350000.0,840000.0,840000.0,2703774.0,2703774.0,255.0,10603.035294,0.000142
1,3.0,0.0,2150000.0,2150000.0,2150000.0,2150000.0,2502930.0,2502930.0,208.0,12033.317308,0.570571
2,1.0,4.0,2725000.0,2725000.0,690000.0,690000.0,3162674.0,3162674.0,176.0,17969.738636,1.355436
3,3.0,18.0,2225000.0,2225000.0,2000000.0,2000000.0,2569519.0,2569519.0,224.0,11471.066964,1.362600
4,5.0,26.0,2500000.0,2500000.0,2500000.0,2500000.0,2925455.0,2925455.0,201.0,14554.502488,0.900175
...,...,...,...,...,...,...,...,...,...,...,...
353,5.0,14.0,2200000.0,2200000.0,1600000.0,1600000.0,2278902.0,2278902.0,273.0,8347.626374,1.485373
354,3.0,0.0,2555000.0,2555000.0,740000.0,740000.0,2564496.0,2564496.0,199.0,12886.914573,0.542406
355,0.0,2.0,2620000.0,2620000.0,390000.0,390000.0,2613199.0,2613199.0,289.0,9042.211073,0.340259
356,1.0,26.0,2740000.0,2740000.0,2740000.0,2740000.0,2841014.0,2841014.0,400.0,7102.535000,0.790709


In [284]:
sample1=sample.copy()

In [285]:
columns_to_drop = [
    'Matched_PID',
    'Input_PAON',
    'Input_Postcode',
    'Input_SAON',
    'Input_Street',
    'Property_Index',
    'property.tenure',
    'position.lat',
    'position.long',
    'property.estimatedCurrentValue.confidence',
    'Price'
]

sample1 = sample1.drop(labels=columns_to_drop, errors='ignore')

In [286]:
columns_to_scale = [
    'property.age',
    'property.highestPricePaid.value',
    'property.highestPricePaid.amount',
    'property.lowestPricePaid.value',
    'property.lowestPricePaid.amount',
    'property.estimatedCurrentValue.value',
    'property.estimatedCurrentValue.amount',
    'property.estimatedCurrentValue.floorArea',
    'property.estimatedCurrentValue.pricePerSqm'
]


In [287]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
final_df[columns_to_scale] = scaler.fit_transform(final_df[columns_to_scale])


In [288]:
# Ensure columns in sample are in same order
sample_scaled = sample1.copy()

# Convert to DataFrame and transpose to a row
sample_df = sample_scaled[columns_to_scale].to_frame().T

# Apply same scaler
scaled_values = scaler.transform(sample_df)

# Assign back to sample
for i, col in enumerate(columns_to_scale):
    sample_scaled[col] = scaled_values[0, i]


In [289]:
final_df

Unnamed: 0,property.type,property.age,property.highestPricePaid.value,property.highestPricePaid.amount,property.lowestPricePaid.value,property.lowestPricePaid.amount,property.estimatedCurrentValue.value,property.estimatedCurrentValue.amount,property.estimatedCurrentValue.floorArea,property.estimatedCurrentValue.pricePerSqm,Physical Distance
0,5.0,0.038462,0.136546,0.136546,0.201179,0.201179,0.348262,0.348262,0.402235,0.219883,0.000142
1,3.0,0.000000,0.056225,0.056225,0.587325,0.587325,0.254515,0.254515,0.270950,0.301789,0.570571
2,1.0,0.153846,0.287149,0.287149,0.156964,0.156964,0.562460,0.562460,0.181564,0.641742,1.355436
3,3.0,0.692308,0.086345,0.086345,0.543110,0.543110,0.285596,0.285596,0.315642,0.269591,1.362600
4,5.0,1.000000,0.196787,0.196787,0.690494,0.690494,0.451735,0.451735,0.251397,0.446166,0.900175
...,...,...,...,...,...,...,...,...,...,...,...
353,5.0,0.538462,0.076305,0.076305,0.425203,0.425203,0.149947,0.149947,0.452514,0.090725,1.485373
354,3.0,0.000000,0.218876,0.218876,0.171702,0.171702,0.283252,0.283252,0.245810,0.350670,0.542406
355,0.0,0.076923,0.244980,0.244980,0.068534,0.068534,0.305985,0.305985,0.497207,0.130501,0.340259
356,1.0,1.000000,0.293173,0.293173,0.761238,0.761238,0.412321,0.412321,0.807263,0.019424,0.790709


In [255]:
sample_scaled

property.type                                      5.0
property.age                                  0.038462
property.highestPricePaid.value               0.136546
property.highestPricePaid.amount              0.136546
property.lowestPricePaid.value                0.201179
property.lowestPricePaid.amount               0.201179
property.estimatedCurrentValue.value          0.348262
property.estimatedCurrentValue.amount         0.348262
property.estimatedCurrentValue.floorArea      0.402235
property.estimatedCurrentValue.pricePerSqm    0.219883
Physical Distance                                  0.0
Name: 3, dtype: object

In [290]:
from sklearn.neighbors import NearestNeighbors
# Ensure both dataframes have the same columns
X_subject = sample_scaled.values
X_comparables = final_df

# Initialize the KNN model with Euclidean distance
knn_euclidean = NearestNeighbors(n_neighbors=10, metric='euclidean')

# Fit the model on the potential comparables data
knn_euclidean.fit(X_comparables)

# Find the nearest 10 neighbors
distances_euclidean, indices_euclidean = knn_euclidean.kneighbors(sample_scaled.to_frame().T)



In [291]:
# Retrieve the nearest 10 Potential_Comparable_IDs using the indices
nearest_properties_df = df.iloc[indices_euclidean[0]]

# Get the corresponding distances for the nearest properties
nearest_distances = distances_euclidean[0]

# Add the distances as a new column to the dataframe
nearest_properties_df['KNN_Distance'] = nearest_distances

# Sort the rows by the distance column, from closest to furthest
nearest_properties_df_sorted = nearest_properties_df.sort_values(by='KNN_Distance')

# Display the sorted rows
print("Rows from potential comparables matching the nearest 10 Property IDs, sorted by distance:")
nearest_properties_df_sorted

Rows from potential comparables matching the nearest 10 Property IDs, sorted by distance:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nearest_properties_df['KNN_Distance'] = nearest_distances


Unnamed: 0,Matched_PID,Input_PAON,Input_SAON,Input_Street,Input_Postcode,Property_Index,property.type,property.tenure,property.age,position.lat,position.long,property.highestPricePaid.value,property.highestPricePaid.amount,property.lowestPricePaid.value,property.lowestPricePaid.amount,property.estimatedCurrentValue.value,property.estimatedCurrentValue.amount,property.estimatedCurrentValue.floorArea,property.estimatedCurrentValue.pricePerSqm,property.estimatedCurrentValue.confidence,Physical Distance,Price,KNN_Distance
0,edd93a5663abe8b8b7bb930acf6c053e,35,,westmoreland road,sw13 9rz,SW13 9RZ 35,5.0,0.0,1.0,51.476907,-0.242901,2350000.0,2350000.0,840000.0,840000.0,2703774.0,2703774.0,255.0,10603.035294,High,0.000142,2350000.0,0.000142
170,d0ef4e4342f43877e1693ef2ca8f0c91,13,,lowther road,sw13 9nx,SW13 9NX 13,5.0,0.0,1.0,51.477052,-0.246553,2575000.0,2575000.0,775000.0,775000.0,2820467.0,2820467.0,232.0,12157.185345,High,0.253439,2575000.0,0.315084
15,149311fe9f07faab6c40a7ebb3db8dc3,26,,gerard road,sw13 9rg,SW13 9RG 26,5.0,0.0,0.0,51.475574,-0.245775,2245000.0,2245000.0,402000.0,402000.0,2619642.0,2619642.0,211.0,12415.364929,High,0.248122,2245000.0,0.359025
191,2f27b073522bb6300aca1e7866940094,15,,melville road,sw13 9rh,SW13 9RH 15,5.0,0.0,0.0,51.476179,-0.242324,2925000.0,2925000.0,1020000.0,1020000.0,2919267.0,2919267.0,240.0,12163.6125,High,0.090458,2925000.0,0.38977
285,6cf936db515d393cfee850cb09f08160,6,,melville road,sw13 9rj,SW13 9RJ 6,5.0,0.0,0.0,51.47589,-0.241486,2820000.0,2820000.0,375000.0,375000.0,2907615.0,2907615.0,241.0,12064.792531,High,0.149577,2820000.0,0.399157
155,f9fb8ad34237a774d6eeaf11e9f61ba2,35,,byfeld gardens,sw13 9hp,SW13 9HP 35,5.0,0.0,0.0,51.476475,-0.241185,2137500.0,2137500.0,292000.0,292000.0,2401294.0,2401294.0,189.0,12705.259259,High,0.128192,2137500.0,0.415947
142,02103dfa2bfea69ed6587a66e1299767,70,,gerard road,sw13 9qq,SW13 9QQ 70,5.0,0.0,1.0,51.476454,-0.248664,2255000.0,2255000.0,900000.0,900000.0,2514051.0,2514051.0,253.0,9936.960474,High,0.402333,2255000.0,0.427293
338,2e0d21a7525d54171a6b3d7af5cead8c,33,,byfeld gardens,sw13 9hp,SW13 9HP 33,5.0,0.0,0.0,51.476411,-0.241162,2225000.0,2225000.0,640000.0,640000.0,2237148.0,2237148.0,173.0,12931.491329,High,0.132513,2225000.0,0.44289
211,e48856806833c33ed33ebf4915f684d1,27,,suffolk road,sw13 9na,SW13 9NA 27,5.0,0.0,1.0,51.478961,-0.243125,2750000.0,2750000.0,1401000.0,1401000.0,2815970.0,2815970.0,214.0,13158.738318,High,0.229012,2750000.0,0.445836
161,7a62547e8bc6cf2106400dc59be407f4,15,,westmoreland road,sw13 9rz,SW13 9RZ 15,5.0,0.0,0.0,51.477052,-0.241744,2875000.0,2875000.0,610000.0,610000.0,3205276.0,3205276.0,268.0,11959.985075,High,0.081746,2875000.0,0.472452


In [292]:
sample

Matched_PID                                   edd93a5663abe8b8b7bb930acf6c053e
Input_PAON                                                                  35
Input_SAON                                                                 NaN
Input_Street                                                 westmoreland road
Input_Postcode                                                        sw13 9rz
Property_Index                                                     SW13 9RZ 35
property.type                                                              5.0
property.tenure                                                            0.0
property.age                                                               1.0
position.lat                                                         51.476907
position.long                                                        -0.242901
property.highestPricePaid.value                                      2350000.0
property.highestPricePaid.amount                    

In [293]:
subject_price=sample['Price']
subject_price

2350000

In [294]:
comparables_mean_price=nearest_properties_df_sorted['Price'].mean()     
comparables_mean_price

np.float64(2515750.0)

In [296]:
import numpy as np

comparables_mean_price = np.float64(2547800.0)
subject_price = 2765000

accuracy = (1 - abs(subject_price - comparables_mean_price) / subject_price) * 100

print(f"Accuracy: {accuracy:.2f}%")


Accuracy: 92.14%
