In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.metrics import r2_score
from sklearn.preprocessing import OneHotEncoder

In [2]:
data = pd.read_csv("data.csv")
data.info()
if not isinstance(data, pd.DataFrame):
    raise TypeError("Expected 'data' to be a pandas DataFrame")

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4600 entries, 0 to 4599
Data columns (total 18 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   date           4600 non-null   object 
 1   price          4600 non-null   float64
 2   bedrooms       4600 non-null   float64
 3   bathrooms      4600 non-null   float64
 4   sqft_living    4600 non-null   int64  
 5   sqft_lot       4600 non-null   int64  
 6   floors         4600 non-null   float64
 7   waterfront     4600 non-null   int64  
 8   view           4600 non-null   int64  
 9   condition      4600 non-null   int64  
 10  sqft_above     4600 non-null   int64  
 11  sqft_basement  4600 non-null   int64  
 12  yr_built       4600 non-null   int64  
 13  yr_renovated   4600 non-null   int64  
 14  street         4600 non-null   object 
 15  city           4600 non-null   object 
 16  statezip       4600 non-null   object 
 17  country        4600 non-null   object 
dtypes: float

In [3]:
# Setting up fields
columns = data.columns.tolist()
features = np.array([col for col in columns if col != 'price'])

# Splitting data
x_features = data[features]
x_target = data['price']


# Handle missing values
x_features = x_features.ffill()
x_target = x_target.fillna(x_target.mean())

# Identify categorical columns
categorical_cols = x_features.select_dtypes(include=['object', 'category']).columns.tolist()

# Identify date columns and convert them to numerical
date_cols = x_features.select_dtypes(include=['datetime', 'datetime64']).columns.tolist()
for date_col in date_cols:
    x_features[date_col] = pd.to_datetime(x_features[date_col]).astype(int) / 10**9  # Convert to seconds since epoch

# Apply OneHotEncoder to categorical columns
ohe = OneHotEncoder(sparse_output=False, drop='first')  # Dropping the first category to avoid multicollinearity
encoded_categorical = ohe.fit_transform(x_features[categorical_cols])

# Convert encoded columns to DataFrame and set proper column names
encoded_categorical_df = pd.DataFrame(encoded_categorical, columns=ohe.get_feature_names_out(categorical_cols))

# Drop original categorical columns and concatenate the encoded columns
x_features = x_features.drop(columns=categorical_cols)
x_features = pd.concat([x_features, encoded_categorical_df], axis=1)

In [4]:
# Split data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x_features, x_target, test_size=0.3, random_state=5)

In [5]:
# Scale the data 
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

# Build the neural network model
model = Sequential([
    Dense(64, activation='relu', input_shape=(x_train.shape[1],)),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dropout(0.2),
    Dense(1)  # Output layer
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
history = model.fit(x_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

# Evaluate the model
y_pred = model.predict(x_test)
ann_accuracy = r2_score(y_test, y_pred)
print("ANN Regression R^2 Score: {:.2f}".format(ann_accuracy))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 12ms/step - loss: 1046192062464.0000 - val_loss: 470758260736.0000
Epoch 2/50
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 767692701696.0000 - val_loss: 470754557952.0000
Epoch 3/50
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 839780466688.0000 - val_loss: 470734405632.0000
Epoch 4/50
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 576737640448.0000 - val_loss: 470686400512.0000
Epoch 5/50
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 722307776512.0000 - val_loss: 470599434240.0000
Epoch 6/50
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 482741452800.0000 - val_loss: 470466592768.0000
Epoch 7/50
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 1222383108096.0000 - val_loss: 470277259264.0000
Epoch 8/50
[1m81