In [155]:
import pandas as pd
pd.set_option('display.max_columns', None)  # Show all columns in DataFrame display
pd.set_option('display.width', None)        # Don't truncate wide rows


In [197]:
import pandas as pd
import numpy as np
import asyncio
from KNN_faiss import GeoKNNSearch
from property_scraper import PropertyScraper
import re  # Import the scraper class
from property_analyzer import PropertyAnalyzer

In [198]:
def load_data():
    """Loads property dataset and splits into recent (2025) and historical records."""
    data_enrich_train = pd.read_excel("Data/final_merged_output.xlsx")

    return data_enrich_train

In [199]:
def initialize_knn(data_enrich_train):
    """Initializes the KNN model with historical property data."""
    return GeoKNNSearch(
        data=data_enrich_train[['Property_Index', 'position.lat', 'position.long']],
        lat_col='position.lat', lon_col='position.long', id_col='Property_Index',
        use_exact_distance=True
    )

In [None]:
df=load_data()
new_df = df.iloc[1:].reset_index(drop=True)

In [None]:
columns_to_keep = [
    "Matched_PID",
    "Input_PAON",
    "Input_SAON",
    "Input_Street",
    "Input_Postcode",
    "Property_Index",
    "property.type",
    "property.tenure",
    "property.age",
    "position.lat",
    "position.long",
    "property.highestPricePaid.value",
    "property.highestPricePaid.amount",
    "property.lowestPricePaid.value",
    "property.lowestPricePaid.amount",
    "property.estimatedCurrentValue.value",
    "property.estimatedCurrentValue.amount",
    "property.estimatedCurrentValue.floorArea",
    "property.estimatedCurrentValue.pricePerSqm",
    "property.estimatedCurrentValue.confidence"
]
new_df = new_df[columns_to_keep]


In [None]:
sample = new_df.iloc[0]
latitude=[sample["position.lat"]]
longitude=[sample["position.long"]]
latitude_np = np.float64(latitude[0])
longitude_np=np.float64(longitude[0])

In [None]:
subject_property_coordinates = (latitude_np, longitude_np)
knn = initialize_knn(new_df)
property_indices, distances = knn.knearest(subject_property_coordinates, 500, return_distances=True)
# Convert to a dictionary to get unique indices and their minimum distances
unique_distances = {}
for index, distance in zip(property_indices, distances):
    if index in unique_distances:
        if unique_distances[index] != distance:
            print(f"Conflict for index {index}: existing distance {unique_distances[index]}, new distance {distance}")
    else:
        unique_distances[index] = distance

potential_comparables = np.array(list(unique_distances.items()))

In [None]:
sample

Matched_PID                                   c95444463147530ca2ace31548b14658
Input_PAON                                                                  57
Input_SAON                                                                 NaN
Input_Street                                                   st johns avenue
Input_Postcode                                                        sw15 6al
Property_Index                                                     SW15 6AL 57
property.type                                                              0.0
property.tenure                                                            0.0
property.age                                                              10.0
position.lat                                                          51.45954
position.long                                                         -0.22164
property.highestPricePaid.value                                      2765000.0
property.highestPricePaid.amount                    

In [None]:
new_potential_comparables = pd.DataFrame(potential_comparables, columns=['Property_Index', 'Physical Distance'])

# Convert 'Physical Distance' to float
new_potential_comparables['Physical Distance'] = new_potential_comparables['Physical Distance'].astype(np.float64)

# Now merge with df on Property_Index
pc_df = df.merge(new_potential_comparables, on='Property_Index', how='inner')

In [None]:
new_potential_comparables = pd.read_excel("Data/premium_property_enrich_individual_properties.xlsx")

# Step 2: Ensure 'Property_Index' exists in both DataFrames
if 'Property_Index' not in new_potential_comparables.columns:
    raise ValueError("The new dataframe does not have a 'Property_Index' column.")

# Step 3: Group by 'Property_Index' and sum the 'Price' column
price_df = new_potential_comparables.groupby('Property_Index', as_index=False)['Price'].sum()

# Step 4: Merge with pc_df
pc_with_price = pc_df.merge(price_df, on='Property_Index', how='left')

# Step 5: Identify and report unmatched rows
unmatched = pc_with_price[pc_with_price['Price'].isna()]
if not unmatched.empty:
    print("Unmatched Property_Index rows:")
    print(unmatched['Property_Index'].tolist())
    print(f"Total unmatched rows: {len(unmatched)}")

# Drop unmatched rows where 'Price' is NaN
pc_with_price = pc_with_price.dropna(subset=['Price']).reset_index(drop=True)


Unmatched Property_Index rows:
['SW15 1AL EGLISTON LAWNS 13 3', 'SW6 3LF CAWTHORN APARTMENTS 86 42', 'SW18 1LL CHARTFIELD HOUSE 27', 'SW18 1UJ COOPERS LOFTS 5 APARTMENT 9', 'SW18 1UJ COOPERS LOFTS 5 APARTMENT 9', 'SW6 6NZ ELM LODGE 75 FLAT 9', 'SW15 2JJ CASTLE COURT 1 FLAT 49']
Total unmatched rows: 7


In [200]:
import pandas as pd

# Step 1: Load the Excel file
df = pd.read_excel("Data/premium_property_enrich_individual_properties.xlsx")

# Step 2: Get the Property_Index from the sample Series
property_index = sample['Property_Index']

# Step 3: Filter the dataframe to match the Property_Index
matching_rows = df[df['Property_Index'] == property_index]

if not matching_rows.empty:
    # Step 4: Sum the Price values if there are duplicates
    total_price = matching_rows['Price'].sum()

    # Step 5: Add Price to the sample Series
    sample['Price'] = total_price
else:
    print(f"No match found for Property_Index: {property_index}")


In [201]:
# Step 1: Sort by physical distance
nearest_5 = pc_with_price.sort_values(by='Physical Distance').head(10).copy()

# Step 2: Avoid division by zero (if distance = 0, add a tiny number)
nearest_5['Weight'] = 1 / (nearest_5['Physical Distance'] + 1e-6)

# Step 3: Calculate weighted average price
weighted_mean_price = (nearest_5['Price'] * nearest_5['Weight']).sum() / nearest_5['Weight'].sum()

# Step 4: Define 50% price bounds
lower_bound = weighted_mean_price * 0.5
upper_bound = weighted_mean_price * 1.5

# Step 5: Filter original dataframe based on those bounds
final_potential_comparables = pc_with_price[
    (pc_with_price['Price'] >= lower_bound) &
    (pc_with_price['Price'] <= upper_bound)
]

# Optional: print weighted mean and bounds
print(f"Weighted Mean Price: {weighted_mean_price:.2f}")
print(f"Allowed Price Range: {lower_bound:.2f} - {upper_bound:.2f}")


Weighted Mean Price: 2775726.61
Allowed Price Range: 1387863.31 - 4163589.92


In [202]:
# Step 6: Find additional rows matching Street, excluding same Property_Index and already-included ones
additional_rows = pc_with_price[
    (pc_with_price['Input_Street'] == sample['Input_Street']) &
    (pc_with_price['Property_Index'] != sample['Property_Index']) &
    (~pc_with_price['Property_Index'].isin(final_potential_comparables['Property_Index']))
]

# Step 7: Add those rows to final_potential_comparables
final_potential_comparables = pd.concat([final_potential_comparables, additional_rows], ignore_index=True)


In [203]:
import pandas as pd
import numpy as np

# Define columns to check
columns_to_check = [
    'property.type', 'property.tenure', 'property.age', 'position.lat', 'position.long',
    'property.highestPricePaid.value', 'property.highestPricePaid.amount',
    'property.lowestPricePaid.value', 'property.lowestPricePaid.amount',
    'property.estimatedCurrentValue.value', 'property.estimatedCurrentValue.amount',
    'property.estimatedCurrentValue.floorArea', 'property.estimatedCurrentValue.pricePerSqm',
    'property.estimatedCurrentValue.confidence', 'Physical Distance', 'Price'
]

# Step 1: Replace blank strings with NaN
df_cleaned = final_potential_comparables.copy()
df_cleaned[columns_to_check] = df_cleaned[columns_to_check].replace(r'^\s*$', np.nan, regex=True)

# Step 2: Count rows before dropping
initial_rows = df_cleaned.shape[0]

# Step 3: Drop rows with any NaN in the selected columns
df_cleaned = df_cleaned.dropna(subset=columns_to_check).reset_index(drop=True)

# Step 4: Count rows after dropping
final_rows = df_cleaned.shape[0]
dropped_rows = initial_rows - final_rows

print(f"Removed {dropped_rows} rows due to missing or invalid values in critical columns.")

# Optional: Assign back to your original variable
final_potential_comparables = df_cleaned


Removed 9 rows due to missing or invalid values in critical columns.


  df_cleaned[columns_to_check] = df_cleaned[columns_to_check].replace(r'^\s*$', np.nan, regex=True)


In [204]:
final_potential_comparables.columns

Index(['Matched_PID', 'Input_PAON', 'Input_SAON', 'Input_Street',
       'Input_Postcode', 'Property_Index', 'property.type', 'property.tenure',
       'property.age', 'position.lat', 'position.long',
       'property.highestPricePaid.value', 'property.highestPricePaid.amount',
       'property.lowestPricePaid.value', 'property.lowestPricePaid.amount',
       'property.estimatedCurrentValue.value',
       'property.estimatedCurrentValue.amount',
       'property.estimatedCurrentValue.floorArea',
       'property.estimatedCurrentValue.pricePerSqm',
       'property.estimatedCurrentValue.confidence', 'Physical Distance',
       'Price'],
      dtype='object')

In [205]:
final_potential_comparables

Unnamed: 0,Matched_PID,Input_PAON,Input_SAON,Input_Street,Input_Postcode,Property_Index,property.type,property.tenure,property.age,position.lat,position.long,property.highestPricePaid.value,property.highestPricePaid.amount,property.lowestPricePaid.value,property.lowestPricePaid.amount,property.estimatedCurrentValue.value,property.estimatedCurrentValue.amount,property.estimatedCurrentValue.floorArea,property.estimatedCurrentValue.pricePerSqm,property.estimatedCurrentValue.confidence,Physical Distance,Price
0,c95444463147530ca2ace31548b14658,57,,st johns avenue,sw15 6al,SW15 6AL 57,Detached,Freehold,2007 onwards,51.459540,-0.221640,2765000.0,"£2,765,000",1700000.0,"£1,700,000",3104865.0,"£3,104,865",287.0,10818.344948,High,0.000136,2765000.0
1,8230b33a05109321e63e819efeae99c3,37,,melrose road,sw18 1lx,SW18 1LX 37,Detached,Freehold,1900-1929,51.451812,-0.205756,2750000.0,"£2,750,000",1850000.0,"£1,850,000",3144625.0,"£3,144,625",258.0,12188.468992,High,1.396289,2750000.0
2,0df927938fcace3143324a9d12968e77,29,,hotham road,sw15 1ql,SW15 1QL 29,Semi Detached,Freehold,before 1900,51.465129,-0.223932,2015000.0,"£2,015,000",500000.0,"£500,000",2339562.0,"£2,339,562",245.0,9549.232653,High,0.641505,2015000.0
3,57974246df65cb387ccd9dacbe0f13ef,4,,bellevue road,sw13 0bj,SW13 0BJ 4,Mid Terrace,Freehold,1900-1929,51.471997,-0.240512,2150000.0,"£2,150,000",2150000.0,"£2,150,000",2502930.0,"£2,502,930",208.0,12033.317308,High,1.904578,2150000.0
4,7b3b49d08b2e3a5ab7bfb45cd1877981,31,,west hill road,sw18 1ll,SW18 1LL 31,Detached,Freehold,INVALID!,51.453506,-0.205442,2400000.0,"£2,400,000",1450000.0,"£1,450,000",2695000.0,"£2,695,000",234.0,11517.094017,High,1.307560,2400000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
412,f59467c254f31024fe380d4c75119b5a,14,,bellevue road,sw13 0bj,SW13 0BJ 14,Mid Terrace,Freehold,1900-1929,51.472233,-0.240662,2555000.0,"£2,555,000",740000.0,"£740,000",2564496.0,"£2,564,496",199.0,12886.914573,High,1.930833,2555000.0
413,eb2b010a895a68036fb9ee1c55128d5f,8,,wedgewood mews,sw6 3eq,SW6 3EQ 8,Flat/Maisonette,Freehold,2019,51.469904,-0.211009,2200000.0,"£2,200,000",1875000.0,"£1,875,000",2176208.0,"£2,176,208",155.0,14040.051613,High,1.367452,2200000.0
414,90808fa150f5e552d63f4852f85ffee0,23,,cristowe road,sw6 3qf,SW6 3QF 23,Mid Terrace,Freehold,1900-1929,51.471135,-0.204404,2383405.0,"£2,383,405",895000.0,"£895,000",2429839.0,"£2,429,839",228.0,10657.188596,High,1.757188,2383405.0
415,f4c14c9dfedc2390645e65fae192f79b,51,,disraeli road,sw15 2dr,SW15 2DR 51,Mid Terrace,Freehold,before 1900,51.461268,-0.213702,2750000.0,"£2,750,000",675000.0,"£675,000",2725250.0,"£2,725,250",276.0,9874.094203,High,0.582596,2750000.0


In [206]:
sample['Physical Distance']=0.0

In [207]:
import pandas as pd
import numpy as np

# Columns
categorical_cols = ['property.type', 'property.tenure', 'property.age']
amount_cols = [
    'property.highestPricePaid.value', 'property.highestPricePaid.amount',
    'property.lowestPricePaid.value', 'property.lowestPricePaid.amount',
    'property.estimatedCurrentValue.value', 'property.estimatedCurrentValue.amount',
    'property.estimatedCurrentValue.floorArea', 'property.estimatedCurrentValue.pricePerSqm',
    'Physical Distance'
]

# Make a copy
df = final_potential_comparables.copy()

# === Step 1: Clean and convert amount columns ===
def clean_currency(val):
    if isinstance(val, str):
        val = val.replace("£", "").replace(",", "").strip()
    try:
        return float(val)
    except:
        return np.nan

for col in amount_cols:
    df[col] = df[col].apply(clean_currency)

# === Step 2: Encode categorical columns ===
from sklearn.preprocessing import OrdinalEncoder

encoder = OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1)
df[categorical_cols] = encoder.fit_transform(df[categorical_cols])

# === Step 3: Apply same transformation to sample (Series) ===
def preprocess_sample(sample, encoder, categorical_cols, amount_cols):
    sample = sample.copy()

    # Clean amounts
    for col in amount_cols:
        val = sample[col]
        if isinstance(val, str):
            val = val.replace("£", "").replace(",", "").strip()
        try:
            sample[col] = float(val)
        except:
            sample[col] = np.nan

    # Encode categoricals
    cat_values = sample[categorical_cols].values.reshape(1, -1)
    sample[categorical_cols] = encoder.transform(cat_values)[0]

    return sample

# Apply to sample
sample = preprocess_sample(sample, encoder, categorical_cols, amount_cols)




In [208]:
final_df=df.copy()

In [209]:
final_df.columns

Index(['Matched_PID', 'Input_PAON', 'Input_SAON', 'Input_Street',
       'Input_Postcode', 'Property_Index', 'property.type', 'property.tenure',
       'property.age', 'position.lat', 'position.long',
       'property.highestPricePaid.value', 'property.highestPricePaid.amount',
       'property.lowestPricePaid.value', 'property.lowestPricePaid.amount',
       'property.estimatedCurrentValue.value',
       'property.estimatedCurrentValue.amount',
       'property.estimatedCurrentValue.floorArea',
       'property.estimatedCurrentValue.pricePerSqm',
       'property.estimatedCurrentValue.confidence', 'Physical Distance',
       'Price'],
      dtype='object')

In [210]:
columns_to_drop = [
    'Matched_PID',
    'Input_PAON',
    'Input_Postcode',
    'Input_SAON',
    'Input_Street',
    'property.tenure',
    'Property_Index',
    'position.lat',
    'position.long',
    'property.estimatedCurrentValue.confidence',
    'Price'
]

final_df = final_df.drop(columns=columns_to_drop)

In [211]:
final_df

Unnamed: 0,property.type,property.age,property.highestPricePaid.value,property.highestPricePaid.amount,property.lowestPricePaid.value,property.lowestPricePaid.amount,property.estimatedCurrentValue.value,property.estimatedCurrentValue.amount,property.estimatedCurrentValue.floorArea,property.estimatedCurrentValue.pricePerSqm,Physical Distance
0,0.0,10.0,2765000.0,2765000.0,1700000.0,1700000.0,3104865.0,3104865.0,287.0,10818.344948,0.000136
1,0.0,0.0,2750000.0,2750000.0,1850000.0,1850000.0,3144625.0,3144625.0,258.0,12188.468992,1.396289
2,6.0,17.0,2015000.0,2015000.0,500000.0,500000.0,2339562.0,2339562.0,245.0,9549.232653,0.641505
3,4.0,0.0,2150000.0,2150000.0,2150000.0,2150000.0,2502930.0,2502930.0,208.0,12033.317308,1.904578
4,0.0,16.0,2400000.0,2400000.0,1450000.0,1450000.0,2695000.0,2695000.0,234.0,11517.094017,1.307560
...,...,...,...,...,...,...,...,...,...,...,...
412,4.0,0.0,2555000.0,2555000.0,740000.0,740000.0,2564496.0,2564496.0,199.0,12886.914573,1.930833
413,3.0,13.0,2200000.0,2200000.0,1875000.0,1875000.0,2176208.0,2176208.0,155.0,14040.051613,1.367452
414,4.0,0.0,2383405.0,2383405.0,895000.0,895000.0,2429839.0,2429839.0,228.0,10657.188596,1.757188
415,4.0,17.0,2750000.0,2750000.0,675000.0,675000.0,2725250.0,2725250.0,276.0,9874.094203,0.582596


In [212]:
sample1=sample.copy()

In [213]:
columns_to_drop = [
    'Matched_PID',
    'Input_PAON',
    'Input_Postcode',
    'Input_SAON',
    'Input_Street',
    'Property_Index',
    'property.tenure',
    'position.lat',
    'position.long',
    'property.estimatedCurrentValue.confidence',
    'Price'
]

sample1 = sample1.drop(labels=columns_to_drop, errors='ignore')

In [214]:
columns_to_scale = [
    'property.age',
    'property.highestPricePaid.value',
    'property.highestPricePaid.amount',
    'property.lowestPricePaid.value',
    'property.lowestPricePaid.amount',
    'property.estimatedCurrentValue.value',
    'property.estimatedCurrentValue.amount',
    'property.estimatedCurrentValue.floorArea',
    'property.estimatedCurrentValue.pricePerSqm'
]


In [215]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
final_df[columns_to_scale] = scaler.fit_transform(final_df[columns_to_scale])


In [216]:
# Ensure columns in sample are in same order
sample_scaled = sample1.copy()

# Convert to DataFrame and transpose to a row
sample_df = sample_scaled[columns_to_scale].to_frame().T

# Apply same scaler
scaled_values = scaler.transform(sample_df)

# Assign back to sample
for i, col in enumerate(columns_to_scale):
    sample_scaled[col] = scaled_values[0, i]


In [217]:
final_df

Unnamed: 0,property.type,property.age,property.highestPricePaid.value,property.highestPricePaid.amount,property.lowestPricePaid.value,property.lowestPricePaid.amount,property.estimatedCurrentValue.value,property.estimatedCurrentValue.amount,property.estimatedCurrentValue.floorArea,property.estimatedCurrentValue.pricePerSqm,Physical Distance
0,0.0,0.588235,0.303213,0.303213,0.403530,0.403530,0.387350,0.387350,0.408353,0.228699,0.000136
1,0.0,0.000000,0.297189,0.297189,0.440049,0.440049,0.400774,0.400774,0.341067,0.293887,1.396289
2,6.0,1.000000,0.002008,0.002008,0.111382,0.111382,0.128949,0.128949,0.310905,0.168316,0.641505
3,4.0,0.000000,0.056225,0.056225,0.513086,0.513086,0.184109,0.184109,0.225058,0.286505,1.904578
4,0.0,0.941176,0.156627,0.156627,0.342666,0.342666,0.248961,0.248961,0.285383,0.261944,1.307560
...,...,...,...,...,...,...,...,...,...,...,...
412,4.0,0.000000,0.218876,0.218876,0.169811,0.169811,0.204897,0.204897,0.204176,0.327119,1.930833
413,3.0,0.764706,0.076305,0.076305,0.446135,0.446135,0.073793,0.073793,0.102088,0.381984,1.367452
414,4.0,0.000000,0.149962,0.149962,0.207547,0.207547,0.159431,0.159431,0.271462,0.221031,1.757188
415,4.0,1.000000,0.297189,0.297189,0.153987,0.153987,0.259175,0.259175,0.382831,0.183772,0.582596


In [218]:
sample_scaled

property.type                                     -1.0
property.age                                 -0.058824
property.highestPricePaid.value               0.303213
property.highestPricePaid.amount              0.303213
property.lowestPricePaid.value                 0.40353
property.lowestPricePaid.amount                0.40353
property.estimatedCurrentValue.value           0.38735
property.estimatedCurrentValue.amount          0.38735
property.estimatedCurrentValue.floorArea      0.408353
property.estimatedCurrentValue.pricePerSqm    0.228699
Physical Distance                                  0.0
Name: 0, dtype: object

In [219]:
from sklearn.neighbors import NearestNeighbors
# Ensure both dataframes have the same columns
X_subject = sample_scaled.values
X_comparables = final_df

# Initialize the KNN model with Euclidean distance
knn_euclidean = NearestNeighbors(n_neighbors=10, metric='euclidean')

# Fit the model on the potential comparables data
knn_euclidean.fit(X_comparables)

# Find the nearest 10 neighbors
distances_euclidean, indices_euclidean = knn_euclidean.kneighbors(sample_scaled.to_frame().T)



In [220]:
# Retrieve the nearest 10 Potential_Comparable_IDs using the indices
nearest_properties_df = df.iloc[indices_euclidean[0]]

# Get the corresponding distances for the nearest properties
nearest_distances = distances_euclidean[0]

# Add the distances as a new column to the dataframe
nearest_properties_df['KNN_Distance'] = nearest_distances

# Sort the rows by the distance column, from closest to furthest
nearest_properties_df_sorted = nearest_properties_df.sort_values(by='KNN_Distance')

# Display the sorted rows
print("Rows from potential comparables matching the nearest 10 Property IDs, sorted by distance:")
nearest_properties_df_sorted

Rows from potential comparables matching the nearest 10 Property IDs, sorted by distance:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nearest_properties_df['KNN_Distance'] = nearest_distances


Unnamed: 0,Matched_PID,Input_PAON,Input_SAON,Input_Street,Input_Postcode,Property_Index,property.type,property.tenure,property.age,position.lat,position.long,property.highestPricePaid.value,property.highestPricePaid.amount,property.lowestPricePaid.value,property.lowestPricePaid.amount,property.estimatedCurrentValue.value,property.estimatedCurrentValue.amount,property.estimatedCurrentValue.floorArea,property.estimatedCurrentValue.pricePerSqm,property.estimatedCurrentValue.confidence,Physical Distance,Price,KNN_Distance
99,2caba77011cde54b8b3fa94006f0c6eb,25,,westleigh avenue,sw15 6rq,SW15 6RQ 25,0.0,0.0,0.0,51.455996,-0.223595,2695000.0,2695000.0,1400000.0,1400000.0,2986805.0,2986805.0,325.0,9190.169231,High,0.416558,2695000.0,1.098263
128,2f32a69a52f5c6e55a4f4b8525b5aab0,20,,hazlewell road,sw15 6lh,SW15 6LH 20,0.0,0.0,0.0,51.459725,-0.225768,2638000.0,2638000.0,2638000.0,2638000.0,2942535.0,2942535.0,231.0,12738.246753,High,0.286715,2638000.0,1.10743
186,d013b698088fc38884c0c2a5a43ac731,6,,tideswell road,sw15 6lj,SW15 6LJ 6,0.0,0.0,0.0,51.460695,-0.224478,2400000.0,2400000.0,2400000.0,2400000.0,2666266.0,2666266.0,215.0,12401.237209,High,0.234731,2400000.0,1.112232
80,8c994a9cfb460c29b28778fbb5500533,35,,westleigh avenue,sw15 6rq,SW15 6RQ 35,0.0,0.0,1.0,51.455994,-0.22477,2525000.0,2525000.0,2525000.0,2525000.0,2896717.0,2896717.0,320.0,9052.240625,High,0.450039,2525000.0,1.15691
50,5cc9d3e6df4de33d8385b4c632cb716d,37,,larpent avenue,sw15 6uu,SW15 6UU 37,0.0,0.0,0.0,51.45804,-0.229144,2655000.0,2655000.0,789000.0,789000.0,3069004.0,3069004.0,300.0,10230.013333,High,0.545923,2655000.0,1.185649
0,c95444463147530ca2ace31548b14658,57,,st johns avenue,sw15 6al,SW15 6AL 57,0.0,0.0,10.0,51.45954,-0.22164,2765000.0,2765000.0,1700000.0,1700000.0,3104865.0,3104865.0,287.0,10818.344948,High,0.000136,2765000.0,1.191086
385,e01fdd60d2ae2968b15588fa90045c17,39,,chartfield avenue,sw15 6hp,SW15 6HP 39,0.0,0.0,1.0,51.457534,-0.228344,2500000.0,2500000.0,2500000.0,2500000.0,2520939.0,2520939.0,233.0,10819.480687,High,0.515164,2500000.0,1.212929
82,989cb4400fce972aa58a9195505f406c,11,,lytton grove,sw15 2ep,SW15 2EP 11,0.0,0.0,1.0,51.45608,-0.215503,2450000.0,2450000.0,2450000.0,2450000.0,2670099.0,2670099.0,273.0,9780.582418,High,0.573369,2450000.0,1.219784
7,6befd28e04eb55cf0af5a57622aa6d65,59,,hazlewell road,sw15 6ut,SW15 6UT 59,0.0,0.0,0.0,51.458893,-0.230824,2500000.0,2500000.0,2500000.0,2500000.0,2858750.0,2858750.0,267.0,10706.928839,High,0.64034,2500000.0,1.236126
346,f07ad3739edce5243bc405921220721e,9,,werter road,sw15 2ll,SW15 2LL 9,0.0,0.0,0.0,51.462431,-0.215025,2350000.0,2350000.0,2350000.0,2350000.0,2382135.0,2382135.0,265.0,8989.188679,High,0.559699,2350000.0,1.245648


In [194]:
sample

Matched_PID                                   c95444463147530ca2ace31548b14658
Input_PAON                                                                  57
Input_SAON                                                                 NaN
Input_Street                                                   st johns avenue
Input_Postcode                                                        sw15 6al
Property_Index                                                     SW15 6AL 57
property.type                                                              0.0
property.tenure                                                            0.0
property.age                                                              10.0
position.lat                                                          51.45954
position.long                                                         -0.22164
property.highestPricePaid.value                                      2765000.0
property.highestPricePaid.amount                    

In [222]:
subject_price=sample['Price']
subject_price

2765000

In [223]:
comparables_mean_price=nearest_properties_df_sorted['Price'].mean()     
comparables_mean_price

np.float64(2547800.0)

In [225]:
import numpy as np

comparables_mean_price = np.float64(2547800.0)
subject_price = 2765000

accuracy = (1 - abs(subject_price - comparables_mean_price) / subject_price) * 100

print(f"Accuracy: {accuracy:.2f}%")


Accuracy: 92.14%
