In [None]:

import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

filename1 = "vietnam_housing_dataset.csv"
filename2 = "cleaned_vietnam_housing_dataset.csv"
def load_data(filename):
    """Loads the preprocessed dataset."""
    df = pd.read_csv(filename)
    df = df.select_dtypes(include=[np.number]).dropna()  # Ensure only numeric columns
    X = df.drop(columns=["Price"], errors="ignore")  # Features
    y = df["Price"] if "Price" in df.columns else df.iloc[:, -1]  # Target
    
    return train_test_split(X, y, test_size=0.2, random_state=42)

def train_mlp(X_train, X_test, y_train, y_test, epochs=100, batch_size=32):
    """Builds, trains, and evaluates an MLP model for house price prediction."""
    
    # Normalize features
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    
    # Define the MLP model
    model = Sequential([
        Dense(128, activation="relu", input_shape=(X_train.shape[1],)),
        Dropout(0.2),
        Dense(64, activation="relu"),
        Dense(1)  # Regression output layer (predicting house price)
    ])

    # Compile the model
    model.compile(optimizer="adam", loss="mse", metrics=["mae"])

    # Train the model
    model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=epochs, batch_size=batch_size)

    # Evaluate the model
    loss, mae = model.evaluate(X_test, y_test)
    print(f"Test MAE: {mae:.2f}")
    
    return model

# Load the preprocessed dataset from DoAn.ipynb
X_train, X_test, y_train, y_test = load_data(filename2)

# Train the MLP model
mlp_model = train_mlp(X_train, X_test, y_train, y_test, epochs=50, batch_size=32)


: 