In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_absolute_error

def load_data(filename):
    """Loads the preprocessed dataset."""
    df = pd.read_csv(filename)
    df = df.select_dtypes(include=[np.number]).dropna()  # Ensure only numeric columns
    X = df.drop(columns=["Price"], errors="ignore")  # Features
    y = df["Price"] if "Price" in df.columns else df.iloc[:, -1]  # Target
    
    return train_test_split(X, y, test_size=0.2, random_state=42)

# Function to normalize data
def normalize_data(X_train, X_test, y_train, y_test):
    scaler_X = StandardScaler()
    scaler_y = StandardScaler()
    X_train_scaled = scaler_X.fit_transform(X_train)
    X_test_scaled = scaler_X.transform(X_test)
    y_train_scaled = scaler_y.fit_transform(y_train.values.reshape(-1, 1)).flatten()
    y_test_scaled = scaler_y.transform(y_test.values.reshape(-1, 1)).flatten()
    return X_train_scaled, X_test_scaled, y_train_scaled, y_test_scaled

def train_mlp(X_train, X_test, y_train, y_test, hidden_layers=(128, 64), max_iter=500):
    # Define MLP model
    mlp = MLPRegressor(hidden_layer_sizes=hidden_layers, activation="relu", solver="adam",
                        max_iter=max_iter, random_state=42)

    # Train model
    mlp.fit(X_train, y_train)

    # Make predictions
    y_pred = mlp.predict(X_test)

    # Evaluate model
    mae = mean_absolute_error(y_test, y_pred)
    print(f"Test MAE: {mae:.2f}")

    return mlp

# Main function
def main():
    # Load the csv file in
    filename1="vietnam_housing_dataset.csv"
    X_train, X_test, y_train, y_test = load_data(filename1)
    X_train_scaled, X_test_scaled, y_train_scaled, y_test_scaled = normalize_data(X_train, X_test, y_train, y_test)
    # Train the MLP model using Scikit-Learn
    train_mlp(X_train_scaled, X_test_scaled, y_train_scaled, y_test_scaled, hidden_layers=(128, 64), max_iter=500)
    
    # Load the csv file in
    filename2="vietnam_housing_dataset_cleaned_data.csv"
    X_train, X_test, y_train, y_test = load_data(filename2)
    X_train_scaled, X_test_scaled, y_train_scaled, y_test_scaled = normalize_data(X_train, X_test, y_train, y_test)
    # Train the MLP model using Scikit-Learn
    train_mlp(X_train_scaled, X_test_scaled, y_train_scaled, y_test_scaled, hidden_layers=(128, 64), max_iter=500)

# Run main function
if __name__ == "__main__":
    main()

Test MAE: 0.65
Test MAE: 0.68
