In [None]:
# Import required libraries
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Load the dataset (make sure 'train.csv' is in your working directory)
df = pd.read_csv('./Datasets/train.csv')

# Select relevant features: square footage, bedrooms, bathrooms
# GrLivArea: Above grade (ground) living area
# BedroomAbvGr: Number of bedrooms above ground
# FullBath: Number of full bathrooms
# SalePrice: Target variable
data = df[['GrLivArea', 'BedroomAbvGr', 'FullBath', 'SalePrice']].dropna()

# Define features (X) and target (y)
X = data[['GrLivArea', 'BedroomAbvGr', 'FullBath']]
y = data['SalePrice']

# Split the dataset into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict prices on the test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

# Print evaluation metrics and coefficients
print("Model Evaluation:")
print(f"R² Score: {r2:.2f}")
print(f"Mean Squared Error: {mse:.2f}")
print(f"Root Mean Squared Error: {rmse:.2f}")
print("\nModel Coefficients:")
for feature, coef in zip(X.columns, model.coef_):
    print(f"{feature}: {coef:.2f}")
print(f"Intercept: {model.intercept_:.2f}")


Model Evaluation:
R² Score: 0.63
Mean Squared Error: 2806426667.25
Root Mean Squared Error: 52975.72

Model Coefficients:
GrLivArea: 104.03
BedroomAbvGr: -26655.17
FullBath: 30014.32
Intercept: 52261.75
