In [110]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [111]:
# Load your dataset (replace 'your_dataset.csv' with your file)
my_mlp = pd.read_csv(r'C:\Users\DELL\Desktop\Machine Learning\real_estate_5000_rows.csv')

In [112]:
my_mlp.shape

(5000, 9)

In [113]:
# Display the first few rows of the dataset
my_mlp.head()

Unnamed: 0,Price,Location,Square_Feet,Bedrooms,Bathrooms,Year_Built,Lot_Size,Neighborhood,City
0,439737,Central,3266,3,4,2004,5102,Greenwood,Portland
1,388938,Northwest,2238,3,1,1984,4406,Greenwood,Portland
2,451815,Central,2646,4,1,1998,5669,Hawthorne,Portland
3,521842,Southwest,2292,3,2,2006,5165,Brighton,Seattle
4,381267,Central,2399,5,2,1992,5043,Greenwood,Portland


In [114]:
# Check for missing values
my_mlp.isnull().sum()

Price           0
Location        0
Square_Feet     0
Bedrooms        0
Bathrooms       0
Year_Built      0
Lot_Size        0
Neighborhood    0
City            0
dtype: int64

In [115]:
my_mlp.describe()

Unnamed: 0,Price,Square_Feet,Bedrooms,Bathrooms,Year_Built,Lot_Size
count,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0
mean,400447.6554,2199.0284,3.5692,2.4254,2001.1106,5499.904
std,79718.384429,399.939063,0.867042,0.81521,12.384016,998.540217
min,140698.0,631.0,2.0,1.0,1980.0,1034.0
25%,347367.25,1935.0,3.0,2.0,1990.0,4828.75
50%,401077.0,2197.0,4.0,2.0,2001.0,5491.0
75%,453280.0,2466.0,4.0,3.0,2011.0,6167.0
max,714099.0,3551.0,5.0,4.0,2022.0,9042.0


In [116]:
my_mlp.columns

Index(['Price', 'Location', 'Square_Feet', 'Bedrooms', 'Bathrooms',
       'Year_Built', 'Lot_Size', 'Neighborhood', 'City'],
      dtype='object')

In [117]:
## Preparing Features and Target
# Target variable
y = my_mlp["Price"]

In [118]:
# Features (droping the target column)
X = my_mlp.drop(columns=["Price"])

In [119]:
## Grouping or categorizing the columns
# Categorical and numerical feature lists
categorical_features = ["Location", "Neighborhood", "City"]
numerical_features = ["Square_Feet", "Bedrooms", "Bathrooms", "Year_Built", "Lot_Size"]

In [120]:
## The Preprocessing Pipeline (Defining transformations for numerical and categorical data )
preprocessor = ColumnTransformer(transformers=[
    ('num', StandardScaler(), numerical_features),
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
])

In [121]:
# Full Pipeline With Random Forest Regressor (Defining full pipeline with model )
model_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(random_state=42))
])

In [122]:
#  Train/Test Split into x and y ( Split dataset)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

In [123]:
# Training my Model (Fit the pipeline )
model_pipeline.fit(X_train, y_train)

In [124]:
# Making Predictions
y_pred = model_pipeline.predict(X_test)

In [125]:
# Calculate performance metrics
mean = mean_absolute_error(y_test, y_pred)
mean_square_error = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [126]:
# Print evaluation results
print("Linear Regression Results")
print("Mean Absolute Error (MAE):", mean)
print("Mean Squared Error (MSE):", mean_square_error)
print("R² Score:", r2)

Linear Regression Results
Mean Absolute Error (MAE): 64558.38687
Mean Squared Error (MSE): 6467797161.721063
R² Score: -0.06881217759393765


In [127]:
# Update the pipeline to use Linear Regression
linear_model_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])

In [None]:
# Training the Linear Regression model
linear_model_pipeline.fit(X_train, y_train)

In [129]:
# Make predictions
y_pred_linear = linear_model_pipeline.predict(X_test)

# Evaluate the Linear Regression model
mae_linear = mean_absolute_error(y_test, y_pred_linear)
mse_linear = mean_squared_error(y_test, y_pred_linear)
r2_linear = r2_score(y_test, y_pred_linear)

In [130]:
# Print evaluation results
print("Linear Regression Results")
print("Mean Absolute Error (MAE):", mae_linear)
print("Mean Squared Error (MSE):", mse_linear)
print("R² Score:", r2)

Linear Regression Results
Mean Absolute Error (MAE): 61968.65499466339
Mean Squared Error (MSE): 6047096524.94295
R² Score: -0.06881217759393765
