In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
# Load the dataset
data = pd.read_csv('Real_Estate.csv')

In [3]:
# Display the first few rows of the dataset
data.head()

Unnamed: 0,Transaction date,House age,Distance to the nearest MRT station,Number of convenience stores,Latitude,Longitude,House price of unit area
0,2012-09-02 16:42:30.519336,13.3,4082.015,8,25.007059,121.561694,6.488673
1,2012-09-04 22:52:29.919544,35.5,274.0144,2,25.012148,121.54699,24.970725
2,2012-09-05 01:10:52.349449,1.1,1978.671,10,25.00385,121.528336,26.694267
3,2012-09-05 13:26:01.189083,22.2,1055.067,5,24.962887,121.482178,38.091638
4,2012-09-06 08:29:47.910523,8.5,967.4,6,25.011037,121.479946,21.65471


In [4]:
# Data Preprocessing
# Check for missing values
print(data.isnull().sum())

Transaction date                       0
House age                              0
Distance to the nearest MRT station    0
Number of convenience stores           0
Latitude                               0
Longitude                              0
House price of unit area               0
dtype: int64


In [5]:
# Features and Target Variable
X = data[['House age', 'Distance to the nearest MRT station', 'Number of convenience stores']]
y = data['House price of unit area']

In [6]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
# Linear Regression Model
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)
y_pred_linear = linear_model.predict(X_test)

In [8]:
# Evaluate Linear Regression Model
print("Linear Regression Model Evaluation:")
print("Mean Squared Error:", mean_squared_error(y_test, y_pred_linear))
print("R^2 Score:", r2_score(y_test, y_pred_linear))

Linear Regression Model Evaluation:
Mean Squared Error: 123.90940128297588
R^2 Score: 0.5520949224893856


In [9]:
# Ridge Regression Model
ridge_model = Ridge(alpha=1.0)
ridge_model.fit(X_train, y_train)
y_pred_ridge = ridge_model.predict(X_test)

In [10]:
# Evaluate Ridge Regression Model
print("Ridge Regression Model Evaluation:")
print("Mean Squared Error:", mean_squared_error(y_test, y_pred_ridge))
print("R^2 Score:", r2_score(y_test, y_pred_ridge))

Ridge Regression Model Evaluation:
Mean Squared Error: 123.9046467607809
R^2 Score: 0.5521121090354444


In [11]:
# Lasso Regression Model
lasso_model = Lasso(alpha=1.0)
lasso_model.fit(X_train, y_train)
y_pred_lasso = lasso_model.predict(X_test)