# House Price Prediction

## Import Libraries

In [1]:
import os
import pandas as pd
import numpy as np
from dotenv import load_dotenv
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from transformations import Transformations

## Load Datasets

In [2]:
load_dotenv()


True

In [3]:
test = pd.read_csv(os.getenv('TEST_DATA'))
train = pd.read_csv(os.getenv('TRAIN_DATA'))

In [4]:
X_train = train.drop(columns=['SalePrice'])
y_train = train['SalePrice']
X_test = test

## EDA

In [5]:
numerical_features = [name for name, typ in X_train.dtypes.items() if typ in ['float64', 'int64']]
categorical_features = [name for name, typ in X_train.dtypes.items() if typ == 'object']

In [6]:
transformer = Transformations(categorical_features, numerical_features)

### Feature Preparation

In [7]:
X_train_transformed = transformer.fit_transform(X_train)
X_test_transformed = transformer.transform(X_test)

## Model

In [8]:
model = LinearRegression()

model.fit(X_train_transformed, y_train)

y_pred = model.predict(X_test_transformed)

### Model Evaluation

In [9]:
# Split train data
X_train_sub, X_val, y_train_sub, y_val = train_test_split(X_train_transformed, y_train, test_size=0.2, random_state=42)

# Train model
model.fit(X_train_sub, y_train_sub)

# Predict on validation set
y_val_pred = model.predict(X_val)

# Compute evaluation metrics
mse = mean_squared_error(y_val, y_val_pred)
r2 = r2_score(y_val, y_val_pred)

print(f"Validation MSE: {mse:.4f}")
print(f"Validation R²: {r2:.4f}")

Validation MSE: 52277989395077381297597841408.0000
Validation R²: -6815614875165635584.0000
