In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
data = pd.read_csv("house-prices-advanced-regression-techniques/train.csv")

In [None]:
print(data.head())

In [None]:
# Selecting relevant columns (square footage: 'GrLivArea', number of bedrooms: 'BedroomAbvGr',
# number of bathrooms: combining 'FullBath' and 'HalfBath', and target: 'SalePrice')
data = data[['GrLivArea', 'BedroomAbvGr', 'FullBath', 'HalfBath', 'SalePrice']]

In [None]:
data['TotalBathrooms'] = data['FullBath'] + 0.5 * data['HalfBath']

In [None]:
data = data.drop(['FullBath', 'HalfBath'], axis=1)

In [None]:
data = data.dropna()

In [None]:
X = data[['GrLivArea', 'BedroomAbvGr', 'TotalBathrooms']]
y = data['SalePrice']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
model = LinearRegression()

In [None]:
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)

In [None]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [None]:
print("Mean Squared Error (MSE):", mse)
print("R-squared (R2):", r2)

In [None]:
coefficients = pd.DataFrame({
    'Feature': ['GrLivArea', 'BedroomAbvGr', 'TotalBathrooms'],
    'Coefficient': model.coef_
})

In [None]:
print("Coefficients:")
print(coefficients)

In [None]:
predictions = pd.DataFrame({
    'Actual': y_test,
    'Predicted': y_pred
})

In [None]:
predictions.to_csv('house_price_predictions.csv', index=False)
print("Predictions saved to 'house_price_predictions.csv'")
