In [4]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Load datasets
df_train = pd.read_csv("train.csv")
df_test = pd.read_csv("test.csv")

# Display basic info
print("Columns in training data:", df_train.columns.tolist())
print("Missing values in training data:\n", df_train.isnull().sum())

# Define input and target columns
input_features = ['GrLivArea', 'BedroomAbvGr', 'FullBath']
output_label = 'SalePrice'

# Prepare training data
X_data = df_train[input_features]
y_data = df_train[output_label]

# Split into training and validation sets
X_tr, X_val, y_tr, y_val = train_test_split(X_data, y_data, test_size=0.2, random_state=42)

# Train the model
lin_reg = LinearRegression()
lin_reg.fit(X_tr, y_tr)

# Predict and evaluate
val_predictions = lin_reg.predict(X_val)
mse = mean_squared_error(y_val, val_predictions)
r2 = r2_score(y_val, val_predictions)

print(f"Validation Mean Squared Error: {mse:.2f}")
print(f"Validation R² Score: {r2:.4f}")

# Predict on test data
X_test_final = df_test[input_features]
final_predictions = lin_reg.predict(X_test_final)

# Prepare submission
submission_df = pd.read_csv("sample_submission.csv")
submission_df['SalePrice'] = final_predictions
submission_df.to_csv("House_price_prediction.csv", index=False)

print("✅ Submission file 'House_price_prediction.csv' created successfully.")


Columns in training data: ['Id', 'MSSubClass', 'MSZoning', 'LotFrontage', 'LotArea', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'MasVnrArea', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinSF1', 'BsmtFinType2', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', 'Heating', 'HeatingQC', 'CentralAir', 'Electrical', '1stFlrSF', '2ndFlrSF', 'LowQualFinSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'KitchenQual', 'TotRmsAbvGrd', 'Functional', 'Fireplaces', 'FireplaceQu', 'GarageType', 'GarageYrBlt', 'GarageFinish', 'GarageCars', 'GarageArea', 'GarageQual', 'GarageCond', 'PavedDrive', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPo