<a href="https://colab.research.google.com/github/KennyJayPhil/ReadHTMLData/blob/main/machineLearningHouseRanking.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [81]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

# Step 1: Load and Clean Data
file_path = 'Machine learning data set.csv'
data = pd.read_csv(file_path)

# Drop rows with missing values if necessary
data.dropna(inplace=True)

# Convert 'Close Date' to datetime format if it's not already
data['Close Date'] = pd.to_datetime(data['Close Date'])

# Step 2: Data Normalization (if needed)
scaler = StandardScaler()
numeric_cols = ['Distance', 'SqFt', 'Bath Full', 'Baths Half', 'Beds Total', 'Year Built', 'Acres']
data[numeric_cols] = scaler.fit_transform(data[numeric_cols])

# Step 3: Input Property Details
input_property = {
    'Distance': 0,
    'SqFt': 1390,
    'Bath Full': 2,
    'Baths Half': 0,
    'Beds Total': 3,
    'Year Built': 1998,
    'Acres': 0.11
}

# Step 4: Define Ranking Criteria and Calculation
def rank_properties(data, input_property):
    criteria_weights = {
        'Distance': -1,
        'Close Date': -2,
        'SqFt': -3,
        'Beds Total': -4,
        'Bath Full': -5,
        'Baths Half': -6,
        'Year Built': -7,
        'Acres': -8
    }

    ranking_scores = []
    for index, row in data.iterrows():
        score = sum([row[key] * criteria_weights[key] for key in input_property.keys()])
        ranking_scores.append(score)

    data['Ranking Score'] = ranking_scores
    data_sorted = data.sort_values(by='Ranking Score', ascending=False)

    return data_sorted

# Step 5: Run the Program
ranked_properties = rank_properties(data, input_property)

# Step 6: Print or Save the Results
print("Ranked Comparable Sales:")
print(ranked_properties[['Distance', 'SqFt', 'Bath Full', 'Baths Half', 'Beds Total', 'Year Built', 'Acres', 'Close Date', 'Ranking Score']])
for index, row in ranked_properties.iterrows():
    print(f"Ranking Score: {row['Ranking Score']}")
    print(f"Distance: {row['Distance']}")
    print(f"SqFt: {row['SqFt']}")
    print(f"Bath Full: {row['Bath Full']}")
    print(f"Baths Half: {row['Baths Half']}")
    print(f"Beds Total: {row['Beds Total']}")
    print(f"Year Built: {row['Year Built']}")
    print(f"Acres: {row['Acres']}")
    print(f"MLS#: {row['MLS#']}")
    print(f"Mls Status: {row['Mls Status']}")
    print(f"Address: {row['Address']}")
    print(f"Garage Spaces: {row['Garage Spaces']}")
    print(f"Carport Spaces: {row['Carport Spaces']}")
    print(f"Close Price: {row['Close Price']}")
    print(f"$/LivSqFt: {row['$/LivSqFt']}")
    print(f"ClsPr/SqFt: {row['ClsPr/SqFt']}")
    print(f"Days On Market: {row['Days On Market']}")
    print(f"Seller Contributions: {row['Seller Contributions']}")
    print(f"Close Date: {row['Close Date']}")
    print("\n")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m


Ranking Score: -5.917766703222945
Distance: -1.46239862844376
SqFt: 0.24397208723514446
Bath Full: 0.39210696312117305
Baths Half: -0.35239609324728294
Beds Total: -0.08742793201629628
Year Built: 1.3106796993741339
Acres: -0.25286941921433087
MLS#: 20548415
Mls Status: Closed
Address: 3507 Avenue G 
Garage Spaces: 2
Carport Spaces: 0
Close Price: $189,900 
$/LivSqFt: $121.89 
ClsPr/SqFt: $121.89 
Days On Market: 2
Seller Contributions: 0
Close Date: 2024-05-14 00:00:00


Ranking Score: -5.929378006086181
Distance: 0.7530501902366886
SqFt: 0.25096896628103005
Bath Full: 0.39210696312117305
Baths Half: -0.35239609324728294
Beds Total: -0.08742793201629628
Year Built: -0.5904406668519583
Acres: 1.132507382114141
MLS#: 20493632
Mls Status: Closed
Address: 5300 Meadowbrook  Drive 
Garage Spaces: 2
Carport Spaces: 0
Close Price: $264,900 
$/LivSqFt: $169.70 
ClsPr/SqFt: $169.70 
Days On Market: 65
Seller Contributions: 9,000

In [56]:
import os

# Specify the file path
file_path = 'Machine learning data set.csv'  # Replace with the file path

# Delete the file
if os.path.exists(file_path):
    os.remove(file_path)
    print(f'{file_path} has been deleted.')
else:
    print(f'{file_path} does not exist.')

Machine learning data set.csv has been deleted.


In [57]:
from google.colab import files

# Upload CSV file
uploaded = files.upload()

# Upload HTML template file
uploaded_template = files.upload()


Saving Machine learning data set.csv to Machine learning data set.csv
