In [2]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

In [3]:
# Load the training and test datasets
train_data = pd.read_csv('Prasunet_ML_01/train.csv')
test_data = pd.read_csv('Prasunet_ML_01/test.csv')

In [4]:
# Select relevant features and the target variable for training
features = ['GrLivArea', 'BedroomAbvGr', 'FullBath']
target = 'SalePrice'

In [5]:
# Handle missing values by filling them with the median of the column
train_data[features] = train_data[features].fillna(train_data[features].median())
test_data[features] = test_data[features].fillna(test_data[features].median())

In [6]:
# Prepare the training data
X_train = train_data[features]
y_train = train_data[target]


In [7]:
# Prepare the test data (note: the test dataset does not contain 'SalePrice')
X_test = test_data[features]


In [8]:
# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [9]:
# Initialize the Linear Regression model
model = LinearRegression()


In [10]:
# Train the model
model.fit(X_train_scaled, y_train)

In [11]:
# Predict on the training set to evaluate the model
y_train_pred = model.predict(X_train_scaled)

In [12]:
# Evaluate the model on the training set
mae_train = mean_absolute_error(y_train, y_train_pred)
mse_train = mean_squared_error(y_train, y_train_pred)
r2_train = r2_score(y_train, y_train_pred)

In [13]:
print(f"Training Set Evaluation:")
print(f"Mean Absolute Error (MAE): {mae_train}")
print(f"Mean Squared Error (MSE): {mse_train}")
print(f"R-squared (R2): {r2_train}")

Training Set Evaluation:
Mean Absolute Error (MAE): 35179.77185588003
Mean Squared Error (MSE): 2628535155.618378
R-squared (R2): 0.5832212987566321


In [14]:
# Make predictions on the test set
y_test_pred = model.predict(X_test_scaled)

In [18]:
# Output the predictions
predictions = pd.DataFrame({
    'Id': test_data['Id'],
    'SalePrice': y_test_pred
})
predictions.to_csv('house_prices_predictions.csv', index=False)

print("Predictions saved to 'house_prices_predictions.csv'")

Predictions saved to 'house_prices_predictions.csv'
