In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
#from sklearn.preprocessing import StandardScaler
# Load the dataset
df = pd.read_csv('AmesHousingdataset.csv')
# Separate features (X) and target (y)
X = df.drop("SalePrice", axis=1)  # Replace 'SalePrice' with the actual target column name
y = df["SalePrice"]
# Identify numerical and categorical columns
numerical_features = X.select_dtypes(include=["int64", "float64"]).columns
categorical_features = X.select_dtypes(include=["object"]).columns
# Preprocessing for numerical and categorical features
preprocessor = ColumnTransformer([
    ("num", SimpleImputer(strategy="mean"), numerical_features),  # Fill missing values with the mean
    ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_features)])  # One-hot encode categorical features])
# Apply preprocessing
X_preprocessed = preprocessor.fit_transform(X)
# Scale the features
scaler = StandardScaler(with_mean=False)
X_scaled = scaler.fit_transform(X_preprocessed)
# Log-transform the target to handle skewness
y = np.log1p(y)

# Split the dataset
X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
# Define the neural network
model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.2),  # Add dropout to prevent overfitting
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(1)  # Output layer for regression
])
# Compile the model
model.compile(optimizer='adam', loss='mse', metrics=['mae'])
# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_val, y_val))
# Evaluate the model
loss, mae = model.evaluate(X_val, y_val)
print(f"Validation MAE: {mae}")
# Predict house prices
predictions = model.predict(X_val)
print(f"prediction: {predictions}")
# Convert predictions back to the original scale
predictions_original_scale = np.expm1(predictions)
print(f"originalprediction: {predictions_original_scale}")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - loss: 9962.5830 - mae: 73.3344 - val_loss: 39.8884 - val_mae: 6.2820
Epoch 2/50
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 725.7778 - mae: 21.4853 - val_loss: 1.3226 - val_mae: 1.0262
Epoch 3/50
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 319.9469 - mae: 14.0953 - val_loss: 25.3324 - val_mae: 5.0118
Epoch 4/50
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 161.5774 - mae: 9.8660 - val_loss: 16.6785 - val_mae: 4.0608
Epoch 5/50
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 91.6001 - mae: 7.3287 - val_loss: 4.8440 - val_mae: 2.1553
Epoch 6/50
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 57.9284 - mae: 5.7928 - val_loss: 7.1265 - val_mae: 2.6323
Epoch 7/50
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step