In [13]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [14]:
df = pd.read_csv('housing.csv')

In [15]:
# Check for missing values
missing_values = df.isnull().sum()
print("Missing values per column:")
print(missing_values)

Missing values per column:
longitude               0
latitude                0
housing_median_age      0
total_rooms             0
total_bedrooms        207
population              0
households              0
median_income           0
median_house_value      0
ocean_proximity         0
dtype: int64


In [16]:
# Handle missing values
# Fill numeric columns with median for 'total_bedrooms'
df['total_bedrooms'] = df['total_bedrooms'].fillna(df['total_bedrooms'].median())

# Fill other numeric columns with mean
numeric_cols = df.select_dtypes(include=[np.number]).columns.difference(['total_bedrooms'])
df[numeric_cols] = df[numeric_cols].fillna(df[numeric_cols].mean())

# Fill non-numeric columns with mode
non_numeric_cols = df.select_dtypes(exclude=[np.number]).columns
for col in non_numeric_cols:
    df[col] = df[col].fillna(df[col].mode()[0])

In [17]:
# Check for missing values
missing_values = df.isnull().sum()
print("Missing values per column:")
print(missing_values)

Missing values per column:
longitude             0
latitude              0
housing_median_age    0
total_rooms           0
total_bedrooms        0
population            0
households            0
median_income         0
median_house_value    0
ocean_proximity       0
dtype: int64


In [18]:
# Encode categorical feature 'ocean_proximity'
if 'ocean_proximity' in df.columns:
    le = LabelEncoder()
    df['ocean_proximity'] = le.fit_transform(df['ocean_proximity'])

In [19]:
# Normalize the dataset
scaler = StandardScaler()
# Assuming 'median_house_value' is the target
X = df.drop('median_house_value', axis=1).values
y = df['median_house_value'].values

X = scaler.fit_transform(X)

In [20]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build the neural network model
model = Sequential()
model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1))  # Assuming a regression problem

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [21]:
# Train the model
model.fit(X_train, y_train, epochs=20, batch_size=10, validation_split=0.2)





Epoch 1/20
[1m1321/1321[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - loss: 55650017280.0000 - val_loss: 48610959360.0000
Epoch 2/20
[1m1321/1321[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - loss: 41960656896.0000 - val_loss: 22861537280.0000
Epoch 3/20
[1m1321/1321[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 18651734016.0000 - val_loss: 12678508544.0000
Epoch 4/20
[1m1321/1321[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 11917352960.0000 - val_loss: 10176109568.0000
Epoch 5/20
[1m1321/1321[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 9436599296.0000 - val_loss: 8604810240.0000
Epoch 6/20
[1m1321/1321[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 7979508224.0000 - val_loss: 7522275840.0000
Epoch 7/20
[1m1321/1321[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 7156505600.0000 - val_loss: 6739545600.0000
Epoch 8/20


<keras.src.callbacks.history.History at 0x78cb02df32b0>

In [22]:
# Evaluate the model's performance
y_pred_train = model.predict(X_train).flatten()
y_pred_test = model.predict(X_test).flatten()

# Calculate performance metrics
mse_train = mean_squared_error(y_train, y_pred_train)
r2_train = r2_score(y_train, y_pred_train)
mae_train = mean_absolute_error(y_train, y_pred_train)

mse_test = mean_squared_error(y_test, y_pred_test)
r2_test = r2_score(y_test, y_pred_test)
mae_test = mean_absolute_error(y_test, y_pred_test)

print(f'Training MSE: {mse_train:.4f}')
print(f'Training R2: {r2_train:.4f}')
print(f'Training MAE: {mae_train:.4f}')
print(f'Test MSE: {mse_test:.4f}')
print(f'Test R2: {r2_test:.4f}')
print(f'Test MAE: {mae_test:.4f}')

[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m129/129[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Training MSE: 4618113802.1888
Training R2: 0.6545
Training MAE: 49063.2381
Test MSE: 4812555928.4886
Test R2: 0.6327
Test MAE: 50158.2744
