In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [11]:
df = pd.read_csv('/content/house_price_regression_dataset (1).csv')
print(df.head())


   Square_Footage  Num_Bedrooms  Num_Bathrooms  Year_Built  Lot_Size  \
0            1360             2              1        1981  0.599637   
1            4272             3              3        2016  4.753014   
2            3592             1              2        2016  3.634823   
3             966             1              2        1977  2.730667   
4            4926             2              1        1993  4.699073   

   Garage_Size  Neighborhood_Quality   House_Price  
0            0                     5  2.623829e+05  
1            1                     6  9.852609e+05  
2            0                     9  7.779774e+05  
3            1                     8  2.296989e+05  
4            0                     8  1.041741e+06  


In [12]:
# Define features (X) and target (Y)
X = df.drop('House_Price', axis=1)  # Use all features except House_Price
y = df['House_Price']


In [13]:
# 1. Scale the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [14]:
# 2. Apply PCA to reduce to 4 components
pca = PCA(n_components=4)
X_pca = pca.fit_transform(X_scaled)

In [15]:
# 3. Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)

In [16]:
# 4. Train the model
model = LinearRegression()
model.fit(X_train, y_train)

In [17]:
# 5. Evaluate
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)


In [18]:
print("PCA Components Explained Variance Ratio:", pca.explained_variance_ratio_)
print("Mean Squared Error:", mse)
print("R-squared:", r2)

PCA Components Explained Variance Ratio: [0.16294252 0.16213721 0.14864701 0.13861011]
Mean Squared Error: 25171911306.133663
R-squared: 0.6094887119629436


In [23]:
# Predict for a new house (example)
example_house = pd.DataFrame([[1360, 2, 1, 1975, 0.599637, 0, 5]], columns=X.columns)
example_scaled = scaler.transform(example_house)
example_pca = pca.transform(example_scaled)
predicted_price = model.predict(example_pca)
print("Predicted Price:", predicted_price[0])

Predicted Price: 341266.1718471638
