In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import torch
from sklearn.model_selection import train_test_split
from torch import nn
import numpy as np
from sklearn.preprocessing import StandardScaler

df = pd.read_csv("housing.csv")
df = df.dropna()
df_cleaned = df.copy()
df_cleaned = pd.get_dummies(df_cleaned, columns=["ocean_proximity"], dtype=int)
df_cleaned["total_bedrooms"] = df_cleaned["total_bedrooms"].fillna(
    df_cleaned["total_bedrooms"].median()
)

X = df_cleaned[
    [
        "longitude",
        "latitude",
        "housing_median_age",
        "total_rooms",
        "total_bedrooms",
        "population",
        "households",
        "median_house_value",
        "ocean_proximity_<1H OCEAN",
        "ocean_proximity_INLAND",
        "ocean_proximity_NEAR BAY",
    ]
]
y = df["median_income"]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
print(y.shape, X.shape)

X = torch.tensor(np.array(X_scaled)).type(dtype=torch.float32)
y = torch.tensor(np.array(y)).type(dtype=torch.float32)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


class Housing(nn.Module):
    def __init__(self):
        super().__init__()
        self.Layer1 = nn.Linear(in_features=11, out_features=13)
        self.Layer2 = nn.Linear(in_features=13, out_features=20)
        self.Layer3 = nn.Linear(in_features=20, out_features=1)
        self.ReLU = nn.ReLU()

    def forward(self, X):
        return self.Layer3(self.ReLU(self.Layer2(self.ReLU(self.Layer1(X)))))


torch.manual_seed(42)
model24 = Housing()

loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(params=model24.parameters(), lr=0.001)

epochs = 2000

for epoch in range(epochs):
    model24.train()
    y_preds = model24(X_train).squeeze()
    loss = loss_fn(y_preds, y_train)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    model24.eval()
    with torch.inference_mode():
        y_test_preds = model24(X_test).squeeze()
        test_loss = loss_fn(y_test_preds, y_test)

    if epoch % 20 == 0:
        print(
            f"Epoch: {epoch} | Training Loss: {loss.item()} | Testing Loss: {test_loss.item()}"
        )

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import torch
from sklearn.model_selection import train_test_split
from torch import nn
import numpy as np
from sklearn.preprocessing import StandardScaler

# Load dataset
df = pd.read_csv("housing.csv")

# -------------------------------
# UPDATE: Handle missing values
# -------------------------------
df = df.dropna()  # drop rows with missing values (simple approach)

df_cleaned = df.copy()

# One-hot encoding for categorical column
df_cleaned = pd.get_dummies(df_cleaned, columns=["ocean_proximity"], dtype=int)

# No need to fill total_bedrooms after dropna, but keeping safety
df_cleaned["total_bedrooms"] = df_cleaned["total_bedrooms"].fillna(
    df_cleaned["total_bedrooms"].median()
)

# Feature selection
X = df_cleaned[
    [
        "longitude",
        "latitude",
        "housing_median_age",
        "total_rooms",
        "total_bedrooms",
        "population",
        "households",
        "median_house_value",
        "ocean_proximity_<1H OCEAN",
        "ocean_proximity_INLAND",
        "ocean_proximity_NEAR BAY",
    ]
]

# Target variable
y = df["median_income"].values.reshape(-1, 1)

y_scaler = StandardScaler()
y_scaled = y_scaler.fit_transform(y)

y = torch.tensor(y_scaled, dtype=torch.float32).squeeze()

# -------------------------------
# UPDATE: Feature Scaling (Very Important)
# -------------------------------
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Convert to tensors
X = torch.tensor(np.array(X_scaled), dtype=torch.float32)
y = torch.tensor(np.array(y), dtype=torch.float32)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


# -------------------------------
# UPDATE: Neural Network with ReLU
# -------------------------------
class Housing(nn.Module):
    def __init__(self):
        super().__init__()
        self.Layer1 = nn.Linear(in_features=11, out_features=13)
        self.Layer2 = nn.Linear(in_features=13, out_features=20)
        self.Layer3 = nn.Linear(in_features=20, out_features=1)
        self.ReLU = nn.ReLU()  # Non-linearity

    def forward(self, X):
        # UPDATE: Using ReLU between layers (better for learning)
        X = self.ReLU(self.Layer1(X))
        X = self.ReLU(self.Layer2(X))
        return self.Layer3(X)


# Reproducibility
torch.manual_seed(42)

model24 = Housing()

# Loss function (Mean Squared Error for regression)
loss_fn = nn.MSELoss()

# -------------------------------
# UPDATE: Lower learning rate (stability)
# -------------------------------
optimizer = torch.optim.Adam(params=model24.parameters(), lr=0.0005)

epochs = 4000

for epoch in range(epochs):
    model24.train()

    # Forward pass
    y_preds = model24(X_train).squeeze()

    # -------------------------------
    # UPDATE: Correct loss order (predictions first)
    # -------------------------------
    loss = loss_fn(y_preds, y_train)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Evaluation
    model24.eval()
    with torch.inference_mode():
        y_test_preds = model24(X_test).squeeze()
        test_loss = loss_fn(y_test_preds, y_test)

    # Print progress every 20 epochs
    if epoch % 20 == 0:
        print(
            f"Epoch: {epoch} | Training Loss: {loss.item()} | Testing Loss: {test_loss.item()}"
        )

  y = torch.tensor(np.array(y), dtype=torch.float32)


Epoch: 0 | Training Loss: 1.0470306873321533 | Testing Loss: 0.85666424036026
Epoch: 20 | Training Loss: 0.9907930493354797 | Testing Loss: 0.80319744348526
Epoch: 40 | Training Loss: 0.9292519092559814 | Testing Loss: 0.7436577081680298
Epoch: 60 | Training Loss: 0.8458612561225891 | Testing Loss: 0.6658174395561218
Epoch: 80 | Training Loss: 0.7423845529556274 | Testing Loss: 0.5733669996261597
Epoch: 100 | Training Loss: 0.6363466382026672 | Testing Loss: 0.48168012499809265
Epoch: 120 | Training Loss: 0.5407956838607788 | Testing Loss: 0.4026115834712982
Epoch: 140 | Training Loss: 0.465734601020813 | Testing Loss: 0.344007283449173
Epoch: 160 | Training Loss: 0.41112014651298523 | Testing Loss: 0.3053702712059021
Epoch: 180 | Training Loss: 0.3743809461593628 | Testing Loss: 0.2821577489376068
Epoch: 200 | Training Loss: 0.34981459379196167 | Testing Loss: 0.2677740156650543
Epoch: 220 | Training Loss: 0.33183255791664124 | Testing Loss: 0.25736936926841736
Epoch: 240 | Training L

In [None]:
df.describe()

In [None]:

df.hist(bins=50,figsize=(20,15))
plt.show()

In [None]:
import numpy as np


def split_train_test(data, test_radio):
    shuffled_indices = np.random.permutation(len(data))
    test_set_size = int(len(data) * test_radio)
    test_indices = shuffled_indices[:test_set_size]
    train_indices = shuffled_indices[test_set_size:]
    return data.iloc[train_indices], data.iloc[test_indices]





In [None]:
X, y = split_train_test(data=df, test_radio=0.2)

print(f"Shape of the X: {X.shape}\n")
print(f"Shape of y: {y.shape}")




In [None]:
from zlib import crc32


def test_set_check(identifier, test_ratio):
    return crc32(np.int64(identifier)) & 0xFFFFFFFF < test_ratio * 2**32


def split_train_test_by_id(data, test_ratio, id_column):
    ids = data[id_column]