In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

# Load the dataset
file_path = 'Real estate valuation data set.xlsx'
data = pd.read_excel(file_path)

# Drop irrelevant columns
data_cleaned = data.drop(columns=["No"])

# Separate features (X) and target (Y)
X = data_cleaned.drop(columns=["Y house price of unit area"])
y = data_cleaned["Y house price of unit area"]

# Feature Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Initialize the Linear Regression model
model = LinearRegression()

# Train the model on the training data
model.fit(X_train, y_train)

# Predict on the test data
y_pred = model.predict(X_test)

# Evaluate the model's performance
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Output evaluation metrics
print("Model Evaluation:")
print("Y = House price of unit area (10,000 New Taiwan Dollar/Ping, where Ping is a local unit, 1 Ping = 3.3 square meters)")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"R-squared (R2): {r2:.2f}")


Model Evaluation:
Y = House price of unit area (10,000 New Taiwan Dollar/Ping, where Ping is a local unit, 1 Ping = 3.3 square meters)
Mean Squared Error (MSE): 53.50
R-squared (R2): 0.68


In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

# Load the dataset
file_path = 'Real estate valuation data set.xlsx'
data = pd.read_excel(file_path)

# Drop irrelevant columns
data_cleaned = data.drop(columns=["No"])

# Separate features (X) and target (Y)
X = data_cleaned.drop(columns=["Y house price of unit area"])
y = data_cleaned["Y house price of unit area"]

# Feature Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Initialize the Linear Regression model
model = LinearRegression()

# Train the model on the training data
model.fit(X_train, y_train)

# Predict on the test data
y_pred = model.predict(X_test)

# Evaluate the model's performance
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Display evaluation results
print("Model Evaluation:")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"R-squared (R2): {r2:.2f}")

# Explain Applications
print("\nApplications of the Model:")
print("1. Estimate property value based on features such as house age, proximity to MRT, and amenities.")
print("2. Perform scenario analysis to see how feature changes affect property prices.")
print("3. Assist in urban planning and policy-making by analyzing price-driving factors.")

# User Input for Prediction
print("\nEnter property features to predict house price (per Ping):")
try:
    transaction_date = float(input("Transaction date (e.g., 2013.25 for Q1 2013): "))
    house_age = float(input("House age (in years): "))
    distance_to_mrt = float(input("Distance to the nearest MRT station (in meters): "))
    num_convenience_stores = int(input("Number of convenience stores nearby: "))
    latitude = float(input("Latitude of the property: "))
    longitude = float(input("Longitude of the property: "))

    # Create a DataFrame for the user input
    user_input = pd.DataFrame([[
        transaction_date, house_age, distance_to_mrt, num_convenience_stores, latitude, longitude
    ]], columns=X.columns)

    # Preprocess the input data
    user_input_scaled = scaler.transform(user_input)

    # Make a prediction
    predicted_price = model.predict(user_input_scaled)[0]
    print(f"\nPredicted house price (per Ping): {predicted_price:.2f} (in 10,000 New Taiwan Dollars)")
    print(f"That is equivalent to {predicted_price * 10000:.2f} New Taiwan Dollars per Ping.")
except ValueError:
    print("Invalid input! Please ensure all inputs are in the correct format.")


Model Evaluation:
Mean Squared Error (MSE): 53.50
R-squared (R2): 0.68

Applications of the Model:
1. Estimate property value based on features such as house age, proximity to MRT, and amenities.
2. Perform scenario analysis to see how feature changes affect property prices.
3. Assist in urban planning and policy-making by analyzing price-driving factors.

Enter property features to predict house price (per Ping):


Transaction date (e.g., 2013.25 for Q1 2013):  2013
House age (in years):  25
Distance to the nearest MRT station (in meters):  700
Number of convenience stores nearby:  5
Latitude of the property:  25
Longitude of the property:  122



Predicted house price (per Ping): 31.31 (in 10,000 New Taiwan Dollars)
That is equivalent to 313128.53 New Taiwan Dollars per Ping.
