In [33]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers


In [34]:
# preprocessing 
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.preprocessing import PowerTransformer
from sklearn.preprocessing import OneHotEncoder
from feature_engine.encoding import RareLabelEncoder
from sklearn.compose import TransformedTargetRegressor
from sklearn.compose import ColumnTransformer
from sklearn.compose import make_column_selector as selector
from category_encoders.ordinal import OrdinalEncoder
from sklearn.feature_selection import VarianceThreshold
from sklearn.linear_model import LinearRegression
from sklearn.decomposition import PCA
from sklearn import cross_decomposition
from sklearn.linear_model import Ridge, Lasso, ElasticNet
from sklearn.impute import KNNImputer

# modeling
from sklearn.model_selection import train_test_split, KFold, RepeatedKFold, cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold
import os

In [35]:
# Load data
apartments = pd.read_csv("../data/apartments_sale_riyadh_cleaned.csv")

# Drop district names in Arabic and post ID
apartments.drop(columns=["district", 'id'], inplace=True)

In [44]:
# Train-test split
train, test = train_test_split(apartments, test_size=0.2, random_state=123)
X_train = train.drop(columns="price")
y_train = train['price']
X_test = test.drop(columns='price')
y_test = test['price']

# Ensure all null values have the same encoding
X_train = X_train.fillna(value=np.nan)
X_test = X_test.fillna(value=np.nan)

In [45]:
# Data preprocessing

# Categorical feature engineering
dummy_encoder = OneHotEncoder(handle_unknown='ignore')

# Impute missing values
knn_imp = KNNImputer(n_neighbors=5, missing_values=np.nan)

preprocessor_minimal = ColumnTransformer(
  remainder="passthrough",
  transformers=[
    ("dummy_encod", dummy_encoder, selector(dtype_include="object")),
    ("knn_impute", knn_imp, selector(dtype_include="number")),
  ])

In [46]:
# Conduct preprocessing on train and test

X_train_preprocessed = preprocessor_minimal.fit_transform(X_train)
X_test_preprocessed = preprocessor_minimal.transform(X_test)

In [47]:
X_train_preprocessed = X_train_preprocessed.toarray()
y_train = y_train.values

# Define the model architecture
model = keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=(X_train_preprocessed.shape[1],)),
    layers.Dense(64, activation='relu'),
    layers.Dense(1)
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_squared_error'])

# Train the model
model.fit(X_train_preprocessed, y_train, epochs=10, batch_size=32)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f7ff2ff0190>