## Week 10 – Advanced Deep Learning (House Price Prediction)

In [14]:
# -------------------------------
# 1️⃣ Imports
# -------------------------------
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Input
import warnings

# Hide future warnings
warnings.filterwarnings("ignore")


In [5]:
# -------------------------------
# 2️⃣ Load Dataset
# -------------------------------
df = pd.read_csv(r"C:\Users\123\Documents\Semester 7\Data-Science-AI-Course\Data\Raw\house_prices.csv")

# Strip spaces from column names
df.columns = df.columns.str.strip()

# Check columns
print("Columns in CSV:", df.columns.tolist())


Columns in CSV: ['Id', 'MSSubClass', 'MSZoning', 'LotFrontage', 'LotArea', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'MasVnrArea', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinSF1', 'BsmtFinType2', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', 'Heating', 'HeatingQC', 'CentralAir', 'Electrical', '1stFlrSF', '2ndFlrSF', 'LowQualFinSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'KitchenQual', 'TotRmsAbvGrd', 'Functional', 'Fireplaces', 'FireplaceQu', 'GarageType', 'GarageYrBlt', 'GarageFinish', 'GarageCars', 'GarageArea', 'GarageQual', 'GarageCond', 'PavedDrive', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'Poo

In [6]:
# -------------------------------
# 3️⃣ Features & Target
# -------------------------------
target_column = 'SalePrice'

# Keep only numeric features
X = df.select_dtypes(include=[np.number]).drop(target_column, axis=1)
y = df[target_column]

# Fill missing values
X = X.fillna(0)        # Or X.fillna(X.mean())
y = y.fillna(y.mean())


In [7]:
# -------------------------------
# 4️⃣ Train/Test Split
# -------------------------------
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [8]:
# -------------------------------
# 5️⃣ Feature Scaling
# -------------------------------
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [9]:
# -------------------------------
# 6️⃣ Reshape for LSTM
# -------------------------------
# LSTM expects 3D input: (samples, timesteps, features)
X_train_rnn = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test_rnn = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))


In [10]:
# -------------------------------
# 7️⃣ Build & Train LSTM
# -------------------------------
model = Sequential()
model.add(Input(shape=(1, X_train.shape[1])))
model.add(LSTM(64, activation='relu'))
model.add(Dense(1))

model.compile(optimizer='adam', loss='mse')

model.fit(X_train_rnn, y_train, epochs=10, batch_size=16, verbose=1)


Epoch 1/10
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 38885380096.0000 
Epoch 2/10
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 38884057088.0000 
Epoch 3/10
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 38880579584.0000 
Epoch 4/10
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 38874361856.0000 
Epoch 5/10
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 38865301504.0000 
Epoch 6/10
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 38853337088.0000 
Epoch 7/10
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 38838865920.0000 
Epoch 8/10
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 38822248448.0000 
Epoch 9/10
[1m73/73[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 38803357696.0000 
Epoch 10/1

<keras.src.callbacks.history.History at 0x14e96349160>

In [11]:
# -------------------------------
# 8️⃣ Predict & Evaluate
# -------------------------------
y_pred = model.predict(X_test_rnn).reshape(-1)

mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print("LSTM Model Performance:")
print("MAE:", mae)
print("RMSE:", rmse)
print("R2 Score:", r2)


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step 
LSTM Model Performance:
MAE: 178607.78125
RMSE: 198840.80064212173
R2 Score: -4.15462589263916


In [12]:
# -------------------------------
# 9️⃣ Comparison Table (Week 9 vs Week 10)
# -------------------------------
results = pd.DataFrame({
    'Model': ['Linear Regression', 'Decision Tree', 'Random Forest', 'ANN (Week 9)', 'LSTM (Week 10)'],
    'MAE': [0, 0, 0, 0, mae],    # Replace 0 with previous weeks' results
    'RMSE': [0, 0, 0, 0, rmse],
    'R2 Score': [0, 0, 0, 0, r2]
})

results


Unnamed: 0,Model,MAE,RMSE,R2 Score
0,Linear Regression,0.0,0.0,0.0
1,Decision Tree,0.0,0.0,0.0
2,Random Forest,0.0,0.0,0.0
3,ANN (Week 9),0.0,0.0,0.0
4,LSTM (Week 10),178607.78125,198840.800642,-4.154626
