In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_percentage_error
from itertools import product
import os

# === Step 1: Load and preprocess data ===
data = pd.read_csv("housing.csv")
data["ocean_proximity"] = data["ocean_proximity"].map({
    "NEAR BAY": 0,
    "<1H OCEAN": 1,
    "INLAND": 2
})
data = data.fillna(0)

col_index = data.columns.get_loc('median_house_value')
data = data.iloc[:, :col_index + 1]

X_raw = data.drop(columns=['median_house_value']).values
y_raw = data['median_house_value'].values.reshape(-1, 1)

# === Step 2: Normalize features ===
x_scaler = MinMaxScaler()
X_scaled = x_scaler.fit_transform(X_raw)

X = torch.tensor(X_scaled, dtype=torch.float32)
y = torch.tensor(y_raw, dtype=torch.float32)

input_dim = X.shape[1]
print(f"Input dimension: {input_dim}")

# === Step 3: Define Neural Network architectures ===
class ForwardModel(nn.Module):
    """f: x -> y"""
    def __init__(self, input_dim, hidden1, hidden2, activation):
        super().__init__()
        act_fn = getattr(nn, activation)()
        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden1),
            act_fn,
            nn.Linear(hidden1, hidden2),
            act_fn,
            nn.Linear(hidden2, 1)
        )

    def forward(self, x):
        return self.net(x)

class InversionModel(nn.Module):
    """g: y -> x"""
    def __init__(self, input_dim, hidden1, hidden2, activation):
        super().__init__()
        act_fn = getattr(nn, activation)()
        self.net = nn.Sequential(
            nn.Linear(1, hidden2),
            act_fn,
            nn.Linear(hidden2, hidden1),
            act_fn,
            nn.Linear(hidden1, input_dim),
            nn.Sigmoid()  # ensures outputs in [0,1]
        )

    def forward(self, y):
        return self.net(y)

# === Step 4: Define Grid Search space ===
param_grid = {
    'hidden1': [32, 64, 128],
    'hidden2': [16, 32, 64],
    'activation': ['ReLU', 'LeakyReLU'],
    'lr': [0.01, 0.001, 0.0005],
    'epochs': [300,500]
}

criterion = nn.MSELoss()
results = []

# === Step 5: Grid Search on Forward Model ===
for hidden1, hidden2, activation, lr, epochs in product(
    param_grid['hidden1'], param_grid['hidden2'],
    param_grid['activation'], param_grid['lr'], param_grid['epochs']
):
    model = ForwardModel(input_dim, hidden1, hidden2, activation)
    optimizer = optim.Adam(model.parameters(), lr=lr)

    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        y_pred = model(X)
        loss = criterion(y_pred, y)
        loss.backward()
        optimizer.step()

    model.eval()
    with torch.no_grad():
        y_eval = model(X).numpy()
        mape = mean_absolute_percentage_error(y, y_eval)
    results.append((hidden1, hidden2, activation, lr, mape))
    print(f"Params: h1={hidden1}, h2={hidden2}, act={activation}, lr={lr}, MAPE={mape:.5f}")

# === Step 6: Pick the best configuration ===
best_params = min(results, key=lambda x: x[-1])
hidden1, hidden2, activation, lr = best_params[:4]
print(f"\n Best Parameters:")
print(f"Hidden1={hidden1}, Hidden2={hidden2}, Activation={activation}, LR={lr}, MAPE={best_params[4]:.6f}")

# === Step 7: Train symmetric Forward & Inverse models jointly ===
f = ForwardModel(input_dim, hidden1, hidden2, activation)
g = InversionModel(input_dim, hidden1, hidden2, activation)
optimizer_f = optim.Adam(f.parameters(), lr=lr)
optimizer_g = optim.Adam(g.parameters(), lr=lr)

lambda_cycle = 0.1
epochs = 1000
for epoch in range(epochs):
    optimizer_f.zero_grad()
    optimizer_g.zero_grad()

    # forward mapping
    y_pred = f(X)

    # add noise for inversion regularization
    noise = torch.randn_like(y) * 0.005
    x_hat = g(y + noise)

    # reconstruction
    y_recon = f(x_hat)
    x_recon = g(y_pred)

    # joint loss
    loss = criterion(y_pred, y) + criterion(y_recon, y) + lambda_cycle * criterion(x_recon, X)
    loss.backward()
    optimizer_f.step()
    optimizer_g.step()

    if (epoch + 1) % 100 == 0:
        print(f"Epoch [{epoch+1}/{epochs}] - Joint Loss: {loss.item():.6f}")

print("Joint training complete.")

# === Step 8: Inverse generation ===
with torch.no_grad():
    y_min, y_max = y.min().item(), y.max().item()
    y_new = torch.linspace(y_min, y_max, 1000).unsqueeze(1)
    X_gen_scaled = g(y_new).detach().numpy()
    X_gen_scaled = np.clip(X_gen_scaled, 0, 1)
    X_gen_real = x_scaler.inverse_transform(X_gen_scaled)

cols = data.drop(columns=['median_house_value']).columns
df_gen = pd.DataFrame(X_gen_real, columns=cols)
df_gen['target_y'] = y_new.numpy().flatten()
os.makedirs("generated_cycles", exist_ok=True)
df_gen.to_csv("generated_cycles/generated_cycle_0.csv", index=False)
print("Initial inverse generation complete.")

# === Step 9: Iterative regeneration ===
num_cycles = 5
for cycle in range(1, num_cycles + 1):
    with torch.no_grad():
        y_cycle = y_new.clone()
        for _ in range(3):  # inner refinement loop
            X_cycle_scaled = g(y_cycle)
            X_cycle_scaled = torch.clamp(X_cycle_scaled, 0, 1)
            y_cycle = f(X_cycle_scaled)

        X_cycle_scaled = X_cycle_scaled.detach().numpy()
        X_cycle_scaled = np.clip(X_cycle_scaled, 0, 1)
        X_cycle_real = x_scaler.inverse_transform(X_cycle_scaled)
        y_cycle_real = y_cycle.detach().numpy()

    df_cycle = pd.DataFrame(X_cycle_real, columns=cols)
    df_cycle['target_y'] = y_cycle_real.flatten()
    df_cycle.to_csv(f"generated_cycles/generated_cycle_{cycle}.csv", index=False)
    print(f"Cycle {cycle} saved ")

print(" All regeneration cycles complete.")

Input dimension: 8
Params: h1=32, h2=16, act=ReLU, lr=0.01, MAPE=0.56898
Params: h1=32, h2=16, act=ReLU, lr=0.01, MAPE=0.57875
Params: h1=32, h2=16, act=ReLU, lr=0.001, MAPE=0.99918
Params: h1=32, h2=16, act=ReLU, lr=0.001, MAPE=0.99691
Params: h1=32, h2=16, act=ReLU, lr=0.0005, MAPE=0.99987
Params: h1=32, h2=16, act=ReLU, lr=0.0005, MAPE=0.99971
Params: h1=32, h2=16, act=LeakyReLU, lr=0.01, MAPE=0.56728
Params: h1=32, h2=16, act=LeakyReLU, lr=0.01, MAPE=0.56634
Params: h1=32, h2=16, act=LeakyReLU, lr=0.001, MAPE=0.99916
Params: h1=32, h2=16, act=LeakyReLU, lr=0.001, MAPE=0.99882
Params: h1=32, h2=16, act=LeakyReLU, lr=0.0005, MAPE=0.99991
Params: h1=32, h2=16, act=LeakyReLU, lr=0.0005, MAPE=0.99944
Params: h1=32, h2=32, act=ReLU, lr=0.01, MAPE=0.44523
Params: h1=32, h2=32, act=ReLU, lr=0.01, MAPE=0.59201
Params: h1=32, h2=32, act=ReLU, lr=0.001, MAPE=0.99884
Params: h1=32, h2=32, act=ReLU, lr=0.001, MAPE=0.99368
Params: h1=32, h2=32, act=ReLU, lr=0.0005, MAPE=0.99983
Params: h1=32, h2