In [72]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures, MinMaxScaler
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score

In [73]:
file_path = '/content/kc_house_data.csv'
data = pd.read_csv(file_path)

In [74]:

data = data.drop(['id', 'date', 'zipcode', 'lat', 'long'], axis=1)

data.drop_duplicates(inplace=True)

Q1 = data['price'].quantile(0.25)
Q3 = data['price'].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
data = data[(data['price'] >= lower_bound) & (data['price'] <= upper_bound)]

X = data.drop(['price'], axis=1)
y = data['price']

scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [75]:
# Linear Regression
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)
y_pred_linear = linear_model.predict(X_test)
linear_mse = mean_squared_error(y_test, y_pred_linear)
linear_r2 = r2_score(y_test, y_pred_linear)

In [76]:
# Polynomial Regression
poly = PolynomialFeatures(degree=2)
X_poly_train = poly.fit_transform(X_train)
X_poly_test = poly.transform(X_test)
poly_model = LinearRegression()
poly_model.fit(X_poly_train, y_train)
y_pred_poly = poly_model.predict(X_poly_test)
poly_mse = mean_squared_error(y_test, y_pred_poly)
poly_r2 = r2_score(y_test, y_pred_poly)

In [77]:
# Decision Tree Regression
tree_model = DecisionTreeRegressor(random_state=42)
tree_model.fit(X_train, y_train)
y_pred_tree = tree_model.predict(X_test)
tree_mse = mean_squared_error(y_test, y_pred_tree)
tree_r2 = r2_score(y_test, y_pred_tree)

In [78]:
print("Model Performance:")
print(f"Linear Regression - MSE: {linear_mse:.2f}, R2: {linear_r2:.4f}")
print(f"Polynomial Regression - MSE: {poly_mse:.2f}, R2: {poly_r2:.4f}")
print(f"Decision Tree Regression - MSE: {tree_mse:.2f}, R2: {tree_r2:.4f}")

Model Performance:
Linear Regression - MSE: 18093690990.39, R2: 0.5759
Polynomial Regression - MSE: 16633184210.97, R2: 0.6101
Decision Tree Regression - MSE: 29005291171.75, R2: 0.3202
