## preprocessing

In [20]:
import re
import pandas as pd
data_1 = pd.read_csv('bina_az_02102023.csv')
data_2 = pd.read_csv('bina_az_new.csv')
data_3 = pd.read_csv('bina_az_old.csv')
data_4 = pd.read_csv('bina_az_25102023.csv')
frames = pd.concat([data_1, data_2, data_3,data_4]).drop_duplicates().dropna()
frames['is_near_metro'] = (frames['description'].str.contains('m\.', case=False) | frames['description'].str.contains('metro',case=False)).astype(int)
frames = frames[frames['seller_type'] != 'seller_type']
frames[['flat', 'total_flat']] = frames['flat_number'].str.split(' / ', expand=True).astype(int)
remove_non_numeric_and_convert_to_float = lambda value: float(re.sub(r'[^\d.]', '', value)) if value else None
frames['area_converted'] = frames['area'].apply(remove_non_numeric_and_convert_to_float)
frames['room_count'] = frames['room_count'].astype(int)
frames['documents_encoded'] = frames['documents'].map({'var': 1, 'yoxdur': 0})
frames['is_repair_encoded'] = frames['is_repair'].map({'var': 1, 'yoxdur': 0})
frames['seller_type_encoded'] = frames['seller_type'].map({'vasitəçi (agent)': 0, 'mülkiyyətçi': 1})
frames['category_encoded'] = frames['category'].map({'Yeni tikili': 0, 'Köhnə tikili': 1})
frames['price'] = frames['price'].str.replace(' ', '').astype(int)
frames = frames[['is_near_metro', 
                 'seller_type_encoded', 
                 'flat', 
                 'total_flat', 
                 'room_count',
                 'area_converted', 
                 'category_encoded',
                 'documents_encoded',
                 'is_repair_encoded', 
                 'price']].drop_duplicates(ignore_index=True)
# frames.to_excel('frames.xlsx', index=False)

## XGBOOST

In [21]:
import warnings
import joblib
import pandas as pd
import xgboost as xgb
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

warnings.filterwarnings("ignore", category=FutureWarning)

data = frames
X = data.drop(columns=['price'])
y = data['price']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)
model = xgb.XGBRegressor(
    learning_rate=0.1,
    n_estimators=100,
    max_depth=3,
    objective='reg:squarederror'
)
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)
r2 = r2_score(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)
mae = mean_absolute_error(y_test, y_pred)
print("Model performance metrics")
print("-----------------------")
print(f"R-squared: {r2:.2f}")
print(f"Root Mean Squared Error: {rmse:.2f}")
print(f"Mean Absolute Error: {mae:.2f}")
print("-----------------------")
joblib.dump(model, 'xgb.pkl')

Model performance metrics
-----------------------
R-squared: 0.71
Root Mean Squared Error: 94855.12
Mean Absolute Error: 47748.10
-----------------------


['xgb.pkl']

## Grid Search RandomForestRegressor

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
import numpy as np
data = frames
X = data.drop(columns=['price'])
y = data['price']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

rf_model = RandomForestRegressor()
grid_search = GridSearchCV(estimator=rf_model, param_grid=param_grid, scoring='neg_mean_squared_error', cv=5)
grid_search.fit(X_train, y_train)
best_params = grid_search.best_params_
best_model = grid_search.best_estimator_
predictions = best_model.predict(X_test)
print("Best Hyperparameters:", best_params)
r_squared = r2_score(y_test, predictions)
rmse = np.sqrt(mean_squared_error(y_test, predictions))
mae = mean_absolute_error(y_test, predictions)
print("R-squared:", r_squared)
print("Root Mean Squared Error (RMSE):", rmse)
print("Mean Absolute Error (MAE):", mae)

Best Hyperparameters: {'max_depth': 30, 'min_samples_leaf': 4, 'min_samples_split': 5, 'n_estimators': 200}
R-squared: 0.738799711229776
Root Mean Squared Error (RMSE): 89502.17958782492
Mean Absolute Error (MAE): 42727.429187055845


## Grid search for XGBRegressor

In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from xgboost import XGBRegressor
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
import numpy as np
data = frames
X = data.drop(columns=['price'])
y = data['price']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [3, 4, 5],  
    'learning_rate': [0.01, 0.1, 0.2],  
    'subsample': [0.8, 0.9, 1.0], 
}
xgb_model = XGBRegressor()  
grid_search = GridSearchCV(estimator=xgb_model, param_grid=param_grid, scoring='neg_mean_squared_error', cv=5)
grid_search.fit(X_train, y_train)
best_params = grid_search.best_params_
best_model = grid_search.best_estimator_
predictions = best_model.predict(X_test)
print("Best Hyperparameters:", best_params)
r_squared = r2_score(y_test, predictions)
rmse = np.sqrt(mean_squared_error(y_test, predictions))
mae = mean_absolute_error(y_test, predictions)
print("R-squared:", r_squared)
print("Root Mean Squared Error (RMSE):", rmse)
print("Mean Absolute Error (MAE):", mae)

Best Hyperparameters: {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 100, 'subsample': 0.9}
R-squared: 0.7077243215163627
Root Mean Squared Error (RMSE): 94676.70258377517
Mean Absolute Error (MAE): 47888.042196584305


## Ensemble methods

In [8]:
import warnings
import joblib
import pandas as pd
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
import xgboost as xgb
warnings.filterwarnings("ignore", category=FutureWarning)
data = frames
X = data.drop(columns=['price'])
y = data['price']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train_scaled, y_train)
xgb_model = xgb.XGBRegressor(
    learning_rate=0.1,
    n_estimators=100,
    max_depth=3,
    objective='reg:squarederror'
)
xgb_model.fit(X_train_scaled, y_train)
rf_predictions = rf_model.predict(X_test_scaled)
xgb_predictions = xgb_model.predict(X_test_scaled)
ensemble_predictions = (rf_predictions + xgb_predictions) / 2
ensemble_r2 = r2_score(y_test, ensemble_predictions)
ensemble_rmse = mean_squared_error(y_test, ensemble_predictions, squared=False)
ensemble_mae = mean_absolute_error(y_test, ensemble_predictions)
print("Ensemble Model Performance Metrics")
print("-----------------------")
print(f"R-squared: {ensemble_r2:.2f}")
print(f"Root Mean Squared Error: {ensemble_rmse:.2f}")
print(f"Mean Absolute Error: {ensemble_mae:.2f}")
print("-----------------------")
# joblib.dump(rf_model, 'random_forest.pkl')
# joblib.dump(xgb_model, 'xgboost.pkl')

Ensemble Model Performance Metrics
-----------------------
R-squared: 0.75
Root Mean Squared Error: 88404.88
Mean Absolute Error: 43143.62
-----------------------


## Stacking method for ensemble

In [10]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor
from sklearn.linear_model import LinearRegression
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
xgb_model = XGBRegressor(learning_rate=0.1, n_estimators=100, max_depth=3, objective='reg:squarederror')
gb_model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
rf_model.fit(X_train, y_train)
xgb_model.fit(X_train, y_train)
gb_model.fit(X_train, y_train)
rf_predictions = rf_model.predict(X_val)
xgb_predictions = xgb_model.predict(X_val)
gb_predictions = gb_model.predict(X_val)
meta_model = LinearRegression()
meta_features = pd.DataFrame({'RandomForest': rf_predictions, 'XGBoost': xgb_predictions, 'GradientBoosting': gb_predictions})
meta_model.fit(meta_features, y_val)
rf_test_predictions = rf_model.predict(X_test)
xgb_test_predictions = xgb_model.predict(X_test)
gb_test_predictions = gb_model.predict(X_test)
meta_test_features = pd.DataFrame({'RandomForest': rf_test_predictions, 'XGBoost': xgb_test_predictions, 'GradientBoosting': gb_test_predictions})
final_predictions = meta_model.predict(meta_test_features)
ensemble_r2 = r2_score(y_test, final_predictions)
ensemble_rmse = mean_squared_error(y_test, final_predictions, squared=False)
ensemble_mae = mean_absolute_error(y_test, final_predictions)
print("Ensemble Model Performance Metrics")
print("-----------------------")
print(f"R-squared: {ensemble_r2:.2f}")
print(f"Root Mean Squared Error: {ensemble_rmse:.2f}")
print(f"Mean Absolute Error: {ensemble_mae:.2f}")
print("-----------------------")

Ensemble Model Performance Metrics
-----------------------
R-squared: 0.75
Root Mean Squared Error: 87161.80
Mean Absolute Error: 41856.48
-----------------------


## Blending method for ensemble

In [11]:
X_base_train, X_meta_train, y_base_train, y_meta_train = train_test_split(X_train, y_train, test_size=0.2, random_state=42)
rf_model.fit(X_base_train, y_base_train)
xgb_model.fit(X_base_train, y_base_train)
gb_model.fit(X_base_train, y_base_train)
rf_meta_predictions = rf_model.predict(X_meta_train)
xgb_meta_predictions = xgb_model.predict(X_meta_train)
gb_meta_predictions = gb_model.predict(X_meta_train)
meta_features = pd.DataFrame({'RandomForest': rf_meta_predictions, 'XGBoost': xgb_meta_predictions, 'GradientBoosting': gb_meta_predictions})
meta_model.fit(meta_features, y_meta_train)
rf_test_predictions = rf_model.predict(X_test)
xgb_test_predictions = xgb_model.predict(X_test)
gb_test_predictions = gb_model.predict(X_test)
meta_test_features = pd.DataFrame({'RandomForest': rf_test_predictions, 'XGBoost': xgb_test_predictions, 'GradientBoosting': gb_test_predictions})
final_predictions = meta_model.predict(meta_test_features)
ensemble_r2 = r2_score(y_test, final_predictions)
ensemble_rmse = mean_squared_error(y_test, final_predictions, squared=False)
ensemble_mae = mean_absolute_error(y_test, final_predictions)
print("Ensemble Model Performance Metrics")
print("-----------------------")
print(f"R-squared: {ensemble_r2:.2f}")
print(f"Root Mean Squared Error: {ensemble_rmse:.2f}")
print(f"Mean Absolute Error: {ensemble_mae:.2f}")
print("-----------------------")

Ensemble Model Performance Metrics
-----------------------
R-squared: 0.74
Root Mean Squared Error: 88480.09
Mean Absolute Error: 42442.15
-----------------------


## neural network

In [13]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from tensorflow import keras
from tensorflow.keras import layers
data = frames
X = data.drop(columns=['price'])
y = data['price']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)
model = keras.Sequential([
    layers.Input(shape=(X_train_scaled.shape[1],)), 
    layers.Dense(128, activation='relu'),  
    layers.Dense(64, activation='relu'),   
    layers.Dense(1) 
])
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
model.fit(X_train_scaled, y_train, epochs=100, batch_size=32, verbose=0)
y_pred = model.predict(X_test_scaled)
r2 = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mae = mean_absolute_error(y_test, y_pred)
print("Neural Network Performance Metrics")
print("-----------------------")
print(f"R-squared: {r2:.2f}")
print(f"Root Mean Squared Error: {rmse:.2f}")
print(f"Mean Absolute Error: {mae:.2f}")
print("-----------------------")

Neural Network Performance Metrics
-----------------------
R-squared: 0.49
Root Mean Squared Error: 125015.13
Mean Absolute Error: 67906.16
-----------------------


In [19]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from tensorflow import keras
from tensorflow.keras import layers
data = frames
X = data.drop(columns=['price'])
y = data['price']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
model = keras.Sequential([
    layers.Input(shape=(X_train_scaled.shape[1],)),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(128, activation='relu'),
    layers.BatchNormalization(),
    layers.Dense(64, activation='relu'),
    layers.Dense(1)
])
lr_schedule = keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.01,
    decay_steps=1000,
    decay_rate=0.9
)
optimizer = keras.optimizers.Adam(learning_rate=lr_schedule)
def custom_loss(y_true, y_pred):
    return keras.losses.mean_squared_error(y_true, y_pred) + 0.01 * keras.losses.kl_divergence(y_true, y_pred)
model.compile(optimizer=optimizer, loss=custom_loss, metrics=['mae'])
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
history = model.fit(
    X_train_scaled, y_train, 
    epochs=150, 
    batch_size=64, 
    validation_data=(X_test_scaled, y_test), 
    verbose=0, 
    callbacks=[early_stopping]
)
y_pred = model.predict(X_test_scaled)
r2 = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mae = mean_absolute_error(y_test, y_pred)
print("Improved Neural Network Performance Metrics")
print("-----------------------")
print(f"R-squared: {r2:.2f}")
print(f"Root Mean Squared Error: {rmse:.2f}")
print(f"Mean Absolute Error: {mae:.2f}")
print("-----------------------")

Improved Neural Network Performance Metrics
-----------------------
R-squared: 0.45
Root Mean Squared Error: 129786.08
Mean Absolute Error: 79261.65
-----------------------


In [21]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from tensorflow import keras
from tensorflow.keras import layers
data = frames
X = data.drop(columns=['price'])
y = data['price']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
model = keras.Sequential([
    layers.Input(shape=(X_train_scaled.shape[1],)),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(128, activation='relu'),
    layers.BatchNormalization(),
    layers.Dense(64, activation='relu'),
    layers.Dense(32, activation='relu'),
    layers.Dense(1)
])
initial_learning_rate = 0.01
lr_schedule = keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate, decay_steps=1000, decay_rate=0.9, staircase=True
)
optimizer = keras.optimizers.Adam(learning_rate=lr_schedule)
def custom_loss(y_true, y_pred):
    mse = keras.losses.mean_squared_error(y_true, y_pred)
    regularization = 0.01 * tf.reduce_sum(model.losses)
    return mse + regularization

model.compile(optimizer=optimizer, loss=custom_loss, metrics=['mae'])
early_stopping = keras.callbacks.EarlyStopping(
    monitor='val_loss', patience=15, restore_best_weights=True
)
history = model.fit(
    X_train_scaled, y_train,
    epochs=200,
    batch_size=64,
    validation_data=(X_test_scaled, y_test),
    verbose=0,
    callbacks=[early_stopping]
)
y_pred = model.predict(X_test_scaled)
r2 = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mae = mean_absolute_error(y_test, y_pred)
print("Improved Neural Network Performance Metrics")
print("-----------------------")
print(f"R-squared: {r2:.2f}")
print(f"Root Mean Squared Error: {rmse:.2f}")
print(f"Mean Absolute Error: {mae:.2f}")
print("-----------------------")

Improved Neural Network Performance Metrics
-----------------------
R-squared: 0.59
Root Mean Squared Error: 111911.90
Mean Absolute Error: 60077.42
-----------------------


In [22]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from tensorflow import keras
from tensorflow.keras import layers
data = frames
X = data.drop(columns=['price'])
y = data['price']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
num_networks = 5
ensemble_predictions = []
for i in range(num_networks):
    model = keras.Sequential([
        layers.Input(shape=(X_train_scaled.shape[1],)),
        layers.Dense(256, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(128, activation='relu'),
        layers.BatchNormalization(),
        layers.Dense(64, activation='relu'),
        layers.Dense(32, activation='relu'),
        layers.Dense(1)
    ])
    initial_learning_rate = 0.01
    lr_schedule = keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate, decay_steps=1000, decay_rate=0.9, staircase=True
    )
    optimizer = keras.optimizers.Adam(learning_rate=lr_schedule)
    def custom_loss(y_true, y_pred):
        mse = keras.losses.mean_squared_error(y_true, y_pred)
        regularization = 0.01 * tf.reduce_sum(model.losses)
        return mse + regularization
    model.compile(optimizer=optimizer, loss=custom_loss, metrics=['mae'])
    early_stopping = keras.callbacks.EarlyStopping(
        monitor='val_loss', patience=15, restore_best_weights=True
    )
    history = model.fit(
        X_train_scaled, y_train,
        epochs=200,
        batch_size=64,
        validation_data=(X_test_scaled, y_test),
        verbose=0,
        callbacks=[early_stopping]
    )
    y_pred = model.predict(X_test_scaled)
    ensemble_predictions.append(y_pred)
ensemble_predictions = np.mean(ensemble_predictions, axis=0)
r2 = r2_score(y_test, ensemble_predictions)
rmse = np.sqrt(mean_squared_error(y_test, ensemble_predictions))
mae = mean_absolute_error(y_test, ensemble_predictions)
print("Ensemble of Neural Networks Performance Metrics")
print("-----------------------")
print(f"R-squared: {r2:.2f}")
print(f"Root Mean Squared Error: {rmse:.2f}")
print(f"Mean Absolute Error: {mae:.2f}")
print("-----------------------")

Ensemble of Neural Networks Performance Metrics
-----------------------
R-squared: 0.62
Root Mean Squared Error: 108341.71
Mean Absolute Error: 59353.70
-----------------------


In [25]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from tensorflow import keras
from tensorflow.keras import layers
data = frames
X = data.drop(columns=['price'])
y = data['price']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
num_networks = 5
ensemble_predictions = []
for i in range(num_networks):
    model = keras.Sequential([
        layers.Input(shape=(X_train_scaled.shape[1],)),
        layers.Dense(512, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(256, activation='relu'),
        layers.BatchNormalization(),
        layers.Dense(128, activation='relu'),
        layers.Dense(64, activation='relu'),
        layers.Dense(32, activation='relu'),
        layers.Dense(1)
    ])
    initial_learning_rate = 0.01
    lr_schedule = keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate, decay_steps=1000, decay_rate=0.9, staircase=True
    )
    optimizer = keras.optimizers.Adam(learning_rate=lr_schedule)
    def custom_loss(y_true, y_pred):
        mse = keras.losses.mean_squared_error(y_true, y_pred)
        regularization = 0.01 * tf.reduce_sum(model.losses)
        return mse + regularization
    model.compile(optimizer=optimizer, loss=custom_loss, metrics=['mae'])
    early_stopping = keras.callbacks.EarlyStopping(
        monitor='val_loss', patience=15, restore_best_weights=True
    )
    history = model.fit(
        X_train_scaled, y_train,
        epochs=200,
        batch_size=64,
        validation_data=(X_test_scaled, y_test),
        verbose=0,
        callbacks=[early_stopping]
    )
    y_pred = model.predict(X_test_scaled)
    ensemble_predictions.append(y_pred)
ensemble_predictions = np.mean(ensemble_predictions, axis=0)
r2 = r2_score(y_test, ensemble_predictions)
rmse = np.sqrt(mean_squared_error(y_test, ensemble_predictions))
mae = mean_absolute_error(y_test, ensemble_predictions)
print("Ensemble of Neural Networks Performance Metrics")
print("-----------------------")
print(f"R-squared: {r2:.2f}")
print(f"Root Mean Squared Error: {rmse:.2f}")
print(f"Mean Absolute Error: {mae:.2f}")
print("-----------------------")

Ensemble of Neural Networks Performance Metrics
-----------------------
R-squared: 0.67
Root Mean Squared Error: 100792.64
Mean Absolute Error: 50511.93
-----------------------


In [26]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from tensorflow import keras
from tensorflow.keras import layers
data = frames
X = data.drop(columns=['price'])
y = data['price']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
num_networks = 20
ensemble_predictions = []
for i in range(num_networks):
    model = keras.Sequential([
        layers.Input(shape=(X_train_scaled.shape[1],)),
        layers.Dense(512, activation='relu'),
        layers.Dropout(0.4),
        layers.Dense(256, activation='relu'),
        layers.BatchNormalization(),
        layers.Dense(128, activation='relu'),
        layers.Dense(64, activation='relu'),
        layers.Dense(32, activation='relu'),
        layers.Dense(1)
    ])
    initial_learning_rate = 0.01
    lr_schedule = keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate, decay_steps=1000, decay_rate=0.9, staircase=True
    )
    optimizer = keras.optimizers.Adam(learning_rate=lr_schedule)
    def custom_loss(y_true, y_pred):
        mse = keras.losses.mean_squared_error(y_true, y_pred)
        regularization = 0.01 * tf.reduce_sum(model.losses)
        return mse + regularization
    model.compile(optimizer=optimizer, loss=custom_loss, metrics=['mae'])
    early_stopping = keras.callbacks.EarlyStopping(
        monitor='val_loss', patience=20, restore_best_weights=True
    )
    history = model.fit(
        X_train_scaled, y_train,
        epochs=250,
        batch_size=64,
        validation_data=(X_test_scaled, y_test),
        verbose=0,
        callbacks=[early_stopping]
    )
    y_pred = model.predict(X_test_scaled)
    ensemble_predictions.append(y_pred)
ensemble_predictions = np.mean(ensemble_predictions, axis=0)
r2 = r2_score(y_test, ensemble_predictions)
rmse = np.sqrt(mean_squared_error(y_test, ensemble_predictions))
mae = mean_absolute_error(y_test, ensemble_predictions)
print("Ensemble of Neural Networks Performance Metrics")
print("-----------------------")
print(f"R-squared: {r2:.2f}")
print(f"Root Mean Squared Error: {rmse:.2f}")
print(f"Mean Absolute Error: {mae:.2f}")
print("-----------------------")

Ensemble of Neural Networks Performance Metrics
-----------------------
R-squared: 0.68
Root Mean Squared Error: 99066.72
Mean Absolute Error: 48012.44
-----------------------
