In [15]:
!pip install pandas scikit-learn tensorflow
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Load training and testing data
train_data = pd.read_csv(r"C:\Users\nsrip\OneDrive\Documents\house_price_prediction_training.csv")
test_data = pd.read_csv(r"C:\Users\nsrip\Downloads\house_price_prediction_testing.csv")

# Data preprocessing
# Define required columns
required_columns = ['living area', 'lot area', 'Area of the basement', 'Built Year', 'Price']

# Filter training data to include only required columns
train_data = train_data[required_columns]

# Filter testing data to include only required columns
test_data = test_data[required_columns[:-1]]  # Exclude 'Price' from testing data

# Handle missing values if any
train_data = train_data.dropna()
test_data = test_data.dropna()

# Define features and target
X = train_data.drop('Price', axis=1)
y = train_data['Price']

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

# Train a Random Forest Regressor
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train_scaled, y_train)

# Evaluate the model on the validation set
y_val_pred_rf = rf_model.predict(X_val_scaled)
rmse_rf = mean_squared_error(y_val, y_val_pred_rf, squared=False)
print(f"Random Forest RMSE on the validation set: {rmse_rf}")

# Train a Neural Network using TensorFlow/Keras
model = keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=[X_train_scaled.shape[1]]),
    layers.Dense(32, activation='relu'),
    layers.Dense(1)
])

model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train_scaled, y_train, epochs=10, batch_size=32, validation_data=(X_val_scaled, y_val))

# Evaluate the model on the validation set
y_val_pred_nn = model.predict(X_val_scaled).flatten()
rmse_nn = mean_squared_error(y_val, y_val_pred_nn, squared=False)
print(f"Neural Network RMSE on the validation set: {rmse_nn}")

# Extract only the relevant features from the test data
X_test = test_data  # Ensure that columns match the training data

# Standardize features using the same scaler
X_test_scaled = scaler.transform(X_test)

# Predict house prices on the testing set using the Random Forest model
y_test_pred_rf = rf_model.predict(X_test_scaled)

# Display predictions
test_data['PredictedPrice_RF'] = y_test_pred_rf

# Add a new column to display increase or decrease for every built year
test_data['Price_Change_Status'] = test_data.groupby('Built Year')['PredictedPrice_RF'].diff().apply(lambda x: 'Increase' if x > 0 else 'Decrease')

print("Predictions using Random Forest:")
print(test_data[['living area', 'lot area', 'Area of the basement', 'Built Year', 'PredictedPrice_RF', 'Price_Change_Status']])

# Predict house prices on the testing set using the Neural Network model
y_test_pred_nn = model.predict(X_test_scaled).flatten()

# Display predictions
test_data['PredictedPrice_NN'] = y_test_pred_nn

# Add a new column to display increase or decrease for every built year
test_data['Price_Change_Status_NN'] = test_data.groupby('Built Year')['PredictedPrice_NN'].diff().apply(lambda x: 'Increase' if x > 0 else 'Decrease')

print("Predictions using Neural Network:")
print(test_data[['living area', 'lot area', 'Area of the basement', 'Built Year', 'PredictedPrice_NN', 'Price_Change_Status_NN']])


Random Forest RMSE on the validation set: 238533.93683675077
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Neural Network RMSE on the validation set: 477784.5078713692
Predictions using Random Forest:
       living area  lot area  Area of the basement  Built Year  \
0             3650      9050                   280        1921   
1             2920      4000                  1010        1909   
2             2910      9480                     0        1939   
3             3310     42998                     0        2001   
4             2710      4500                   830        1929   
...            ...       ...                   ...         ...   
14615         1556     20000                     0        1957   
14616         1680      7000                     0        1968   
14617         1070      6120                     0        1962   
14618         1030      6621                     0        1955   
14619   