In [None]:
# import library
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

In [None]:
# Step 1: Read data from a CSV file (replace 'data_property_200.csv' with the appropriate file name)
data = pd.read_csv('data_property.csv')

In [None]:
# Step 2: Separate the data into features (X) and the target (y)
X = data[['Land Area (m²)', 'Building Area (m²)', 'Number of Bedrooms']]
y = data['Selling Price (million)']

In [None]:
# Step 3: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Step 4: Normalize the features using StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
# Step 5: Create and train a linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

In [None]:
# Step 6: Make predictions of selling prices on the test data
y_pred = model.predict(X_test)
print('Predicted selling prices:', y_pred)

In [None]:
# Step 8: Visualize the results
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred, color='blue')
plt.xlabel("Actual Price")
plt.ylabel("Predicted Price")
plt.title("Actual Price vs. Predicted Price")
plt.grid()
plt.show()

In [None]:
residuals = y_test - y_pred

plt.figure(figsize=(10, 6))
plt.scatter(y_test, residuals, color='blue')
plt.axhline(y=0, color='red', linestyle='--')
plt.xlabel("Actual Price (million)")
plt.ylabel("Residuals (Actual - Predicted)")
plt.title("Residual Plot")
plt.grid()
plt.show()

In [None]:
plt.figure(figsize=(12, 6))
sns.histplot(data=data, x='Selling Price (million)', bins=20, kde=True, color='blue')
plt.xlabel("Selling Price (million)")
plt.ylabel("Frequency")
plt.title("Distribution of Property Prices")
plt.grid()
plt.show()

In [None]:
# Step 7: Evaluate the model
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Model Evaluation Results:")
print(f"Mean Absolute Error: {mae}")
print(f"Mean Squared Error: {mse}")
print(f"R-squared (Coefficient of Determination): {r2}")