In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load training data
train_df = pd.read_csv('train.csv')

# Select relevant features and target
X = train_df[['GrLivArea', 'BedroomAbvGr', 'FullBath']]
y = train_df['SalePrice']

# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict on validation set
y_pred = model.predict(X_val)

# Evaluate the model
mse = mean_squared_error(y_val, y_pred)
r2 = r2_score(y_val, y_pred)
print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')

# Print the predicted values for the validation set
print('Predicted house prices for validation set:', y_pred)

# Load test data
test_df = pd.read_csv('test.csv')
X_test = test_df[['GrLivArea', 'BedroomAbvGr', 'FullBath']]

# Predict on test set
test_pred = model.predict(X_test)

# Prepare submission file
submission = pd.read_csv('sample_submission.csv')
submission['SalePrice'] = test_pred
submission.to_csv('sample_submission.csv', index=False)

# Print the predicted values for the test set
print('Predicted house prices for test set:', test_pred)


Mean Squared Error: 2806426667.247853
R-squared: 0.6341189942328371
Predicted house prices for validation set: [113410.67255298 305081.87775899 135904.78562983 205424.67564124
 227502.68349004 121157.48079629 205577.98056584 183787.20378269
 121157.48079629 147219.22233196 185971.75622995  93229.5689926
 121965.26049018 192005.28203666 208129.35962356 133824.25948958
 211250.14883393 146931.57417313 134240.36471763 186179.80884397
 219884.33231595 197726.72892233 185867.72992294 126334.3653847
 181186.54610738 183707.60823788 171720.15216927 118844.47127982
 199183.09722051 199623.63321076 132551.51304323 245162.72491993
 383364.40832164 127998.78629689 202511.9390449  120484.46142981
 200376.66521523 206232.45533514 241160.86372906 120716.94480604
 124645.5137103  251820.40856871 101135.56832553 244746.61969188
 105088.56799199 208912.70855526 123837.7340164  101655.69986059
 271169.30167299 160846.66855057 105088.56799199 221989.28921839
 137960.88100787 360448.30385172 143835.215724