<a href="https://colab.research.google.com/github/Inno-Geek/House-Price-Prediction/blob/main/Predicting_house_prices.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load the dataset
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')

# Data preprocessing
train_data.fillna(-1, inplace=True)
test_data.fillna(-1, inplace=True)

# Convert categorical features to strings
cat_cols = train_data.select_dtypes(include=['object']).columns
train_data[cat_cols] = train_data[cat_cols].astype(str)
test_data[cat_cols] = test_data[cat_cols].astype(str)

# Encode categorical features with numeric features
one_hot_encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False)
encoded_features_train = one_hot_encoder.fit_transform(train_data[cat_cols])
encoded_features_test = one_hot_encoder.transform(test_data[cat_cols])

# Combine encoded features with numeric features
numeric_cols = train_data.select_dtypes(include=['int64', 'float64']).drop(['Id', 'SalePrice'], axis=1).columns
encoded_cols = [f'encoded_{i}' for i in range(encoded_features_train.shape[1])]
X_train = pd.concat([pd.DataFrame(encoded_features_train, columns=encoded_cols), train_data[numeric_cols].reset_index(drop=True)], axis=1)
X_test = pd.concat([pd.DataFrame(encoded_features_test, columns=encoded_cols), test_data[numeric_cols].reset_index(drop=True)], axis=1)
y_train = train_data['SalePrice']

# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Create the model
model = Sequential()
model.add(Dense(128, activation='relu', input_dim=X_train.shape[1]))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='linear'))
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train, y_train, epochs=100, batch_size=32)

# Predict on test data
y_pred = model.predict(X_test)

# Evaluate the model using a separete validation set
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)
y_val_pred = model.predict(X_val)

mse = mean_squared_error(y_val, y_val_pred)
mae = mean_absolute_error(y_val, y_val_pred)
r2 = r2_score(y_val, y_val_pred)

print(f"Mean Squared Error: {mse}")
print(f"Mean Absolute Error: {mae}")
print(f"R-squared: {r2}")



Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 39478321152.0000
Epoch 2/100
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 39585816576.0000
Epoch 3/100
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 38723899392.0000
Epoch 4/100
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 33854838784.0000
Epoch 5/100
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 24482054144.0000
Epoch 6/100
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 12694819840.0000
Epoch 7/100
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 6933769728.0000
Epoch 8/100
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 4787598848.0000
Epoch 9/100
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 3243874560.0000
Epoch 10/100
[1m46/46[0