In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# --- 1. Load the cleaned data ---

In [2]:
df = pd.read_csv('./Data/cleaned_housePrice.csv')
df

Unnamed: 0,Area,Room,Parking,Warehouse,Elevator,Address,Price,Price(USD)
0,63,1,True,True,True,Shahran,1.850000e+09,61666.67
1,60,1,True,True,True,Shahran,1.850000e+09,61666.67
2,79,2,True,True,True,Pardis,5.500000e+08,18333.33
3,95,2,True,True,True,Shahrake Qods,9.025000e+08,30083.33
4,123,2,True,True,True,Shahrake Gharb,7.000000e+09,233333.33
...,...,...,...,...,...,...,...,...
3243,63,1,True,True,False,Feiz Garden,1.890000e+09,63000.00
3244,86,2,True,True,True,Southern Janatabad,3.500000e+09,116666.67
3245,83,2,True,True,True,Niavaran,6.800000e+09,226666.67
3246,105,2,True,True,True,Dorous,5.600000e+09,186666.67


# --- 2. Define Features (X) and Target (y) ---

In [3]:
# We will use 'Area', 'Room', 'Parking', 'Warehouse', and 'Elevator' to predict 'Price'.
features = ['Area', 'Room', 'Parking', 'Warehouse', 'Elevator']
target = 'Price'

# Make sure all required columns exist in the DataFrame
if not all(col in df.columns for col in features + [target]):
    print("Error: One or more required columns are missing from the dataset.")
    exit()

X = df[features]
y = df[target]

# --- 3. Split the data into training and testing sets ---

In [4]:
# This is crucial for evaluating the model's performance on new, unseen data.
# We'll use 80% for training and 20% for testing.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"\nTraining set size: {X_train.shape[0]} samples")
print(f"Testing set size: {X_test.shape[0]} samples")


Training set size: 2598 samples
Testing set size: 650 samples


# --- 4. Train the Linear Regression Model ---

In [5]:
model = LinearRegression()
print("\nTraining the Linear Regression model...")
model.fit(X_train, y_train)
print("Model training complete.")


Training the Linear Regression model...
Model training complete.


# --- 5. Make predictions on the test set ---

In [6]:
y_pred = model.predict(X_test)

# --- 6. Evaluate the model's performance ---

In [7]:
print("\n--- Model Evaluation ---")

# R-squared (R2) Score: Explains the proportion of variance in the dependent variable
# that is predictable from the independent variable(s).
r2 = r2_score(y_test, y_pred)
print(f"R-squared (R2) Score: {r2:.2f}")

# Mean Absolute Error (MAE): The average of the absolute differences between
# predicted and actual values.
mae = mean_absolute_error(y_test, y_pred)
print(f"Mean Absolute Error (MAE): {mae:.2f}")

# Mean Squared Error (MSE): The average of the squared differences.
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error (MSE): {mse:.2f}")


--- Model Evaluation ---
R-squared (R2) Score: 0.33
Mean Absolute Error (MAE): 4038804350.09
Mean Squared Error (MSE): 38009926598844047360.00


In [8]:
print("\nPredictive modeling code executed successfully.")


Predictive modeling code executed successfully.
