# Real Estate Prices
The purpose of this machine learning model is to predict fair market prices for real estate sales. 

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

In [None]:
df = pd.read_csv("AmesHousing.csv")

# Data exploration 
print(df.head())
print(df.info())
print(df.columns)

In [None]:
# One-hot encode one column for each neighborhood and rejoin to dataframe 
ohe = pd.get_dummies(df['Neighborhood'], prefix="Neighborhood")

# Dimensionality reduction to 4 critical factors 
X = df[['Lot Area', 'Year Built', 'Gr Liv Area']]
X = X.join(ohe)

y = df['SalePrice']

print(X.columns)

In [None]:
#Training data v Test data
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, test_size=0.2, random_state=32)

In [None]:
# Creates a LinearRegression model,
mlr = LinearRegression()

In [None]:
# Finds the coefficients(m) and the intercept value(b)
mlr.fit(X_train, y_train)

In [None]:
# Inputs values calculated by `.fit()` and the `x` values, plugs them into the multiple linear regression equation, and calculates the predicted y values.
y_predict = mlr.predict(X_test)

In [None]:
# Model 
# Housing features to see a fair market price

features = [
    'Lot Area', 'Year Built', 'Gr Liv Area',
    'Neighborhood_Blmngtn', 'Neighborhood_Blueste', 'Neighborhood_BrDale',
    'Neighborhood_BrkSide', 'Neighborhood_ClearCr', 'Neighborhood_CollgCr',
    'Neighborhood_Crawfor', 'Neighborhood_Edwards', 'Neighborhood_Gilbert',
    'Neighborhood_Greens', 'Neighborhood_GrnHill', 'Neighborhood_IDOTRR',
    'Neighborhood_Landmrk', 'Neighborhood_MeadowV', 'Neighborhood_Mitchel',
    'Neighborhood_NAmes', 'Neighborhood_NPkVill', 'Neighborhood_NWAmes',
    'Neighborhood_NoRidge', 'Neighborhood_NridgHt', 'Neighborhood_OldTown',
    'Neighborhood_SWISU', 'Neighborhood_Sawyer', 'Neighborhood_SawyerW',
    'Neighborhood_Somerst', 'Neighborhood_StoneBr', 'Neighborhood_Timber',
    'Neighborhood_Veenker'
]

# Ask user for input
neighborhood = input("Enter neighborhood (e.g., ClearCr, NWAmes, Veenker): ")
lot_area = input("Enter lot area (e.g., 3000, 5000, 7000): ")
year_built = input("Enter year built (e.g., 1925, 1975, 2025): ")
gr_liv_area = input("Enter square feet of the house (e.g., 1000, 2000, 3000): ")

# Build a dataframe with zeros
row = pd.DataFrame([[0] * len(features)], columns=features)

# Fill numeric features
row.loc[0, 'Lot Area'] = int(lot_area)
row.loc[0, 'Year Built'] = int(year_built)
row.loc[0, 'Gr Liv Area'] = int(gr_liv_area)

# Activate the correct one-hot neighborhood
col_name = f'Neighborhood_{neighborhood}'
if col_name in row.columns:
    row.loc[0, col_name] = 1
else:
    print(f"Warning: {neighborhood} not a valid neighborhood")

# Predict
prediction = mlr.predict(row)[0]
print("Predicted price: " f"${prediction:,.2f}")

In [None]:
# Evaluation 
print("Train score:", f"{mlr.score(X_train, y_train)*100:,.2f}%")
print("Test score:", f"{mlr.score(X_test, y_test)*100:,.2f}%")