In [1]:
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [None]:
import torch
import torch.nn as nn

class Standardizer(nn.Module):
    def __init__(self, eps=1e-8):
        super().__init__()
        self.register_buffer("mean", None)
        self.register_buffer("std", None)
        self.eps = eps

    def fit(self, X):
        with torch.no_grad():
            self.mean = X.mean(dim=0)
            self.std  = X.std(dim=0).clamp_min(self.eps)
        return self

    def forward(self, X):
        return (X - self.mean) / self.std

    def inverse(self, Xn):
        return Xn * self.std + self.mean


""" ####Usage####
x_norm = Standardizer().fit(X_train)
y_norm = Standardizer().fit(y_train)

Xtr = x_norm(X_train)
Xte = x_norm(X_test)
ytr = y_norm(y_train)
yte = y_norm(y_test)

# At inference:
with torch.no_grad():
    y_pred = y_norm.inverse(model(Xte))"""


In [3]:
torch.manual_seed(42)

n_samples = 1000

# 3 features: size (m2), bedrooms, age
size = torch.randint(40, 2000, (n_samples, 1), dtype=torch.float32)
badrooms = torch.randint(1, 6, (n_samples, 1), dtype=torch.float32)
age = torch.randint(0, 50, (n_samples, 1), dtype=torch.float32)

# Stack into feature matrix X (shape [1000, 3])
X = torch.cat([size, badrooms, age], dim=1)


# True weights for each feature (hidden truth)
true_weights = torch.tensor([[0.5],   # per m2
                             [20.0],  # per bedroom
                             [-0.3]]) # per year of age

true_bias = 30.0  # base price (100 000 NOK)
# Generate target y = Xw + b + noise
noise = torch.randn(n_samples, 1) * 10
y = X @ true_weights + true_bias + noise

x_scaler = StandardScaler()
y_scaler = StandardScaler()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
device = "cuda" if torch.cuda.is_available() else "cpu"

Xtr = torch.from_numpy(x_scaler.fit_transform(X_train.numpy())).float().to(device)
Xte = torch.from_numpy(x_scaler.transform(X_test.numpy())).float().to(device)

ytr = torch.from_numpy(y_scaler.fit_transform(y_train.numpy())).float().to(device)
yte = torch.from_numpy(y_scaler.transform(y_test.numpy())).float().to(device)

In [4]:
import torch.nn as nn

class LinRegModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(in_features=3,
                                out_features=1)
    def forward(self,x):
        return self.linear(x)

In [5]:
#training loop
model = LinRegModel().to(device)
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)

epochs = 100_000
for epoch in range(epochs):
    model.train()

    y_pred = model(Xtr)
    loss = loss_fn(y_pred, ytr)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if epoch % 200 == 0:
        model.eval()
        with torch.no_grad():
            val_loss = loss_fn(model(Xte), yte)
        print(f"Epoch {epoch}, Train loss: {loss.item():.2f}, Val loss: {val_loss.item():.2f}")


Epoch 0, Train loss: 1.13, Val loss: 1.24
Epoch 200, Train loss: 1.12, Val loss: 1.24
Epoch 400, Train loss: 1.11, Val loss: 1.23
Epoch 600, Train loss: 1.10, Val loss: 1.22
Epoch 800, Train loss: 1.10, Val loss: 1.21
Epoch 1000, Train loss: 1.09, Val loss: 1.20
Epoch 1200, Train loss: 1.08, Val loss: 1.19
Epoch 1400, Train loss: 1.07, Val loss: 1.18
Epoch 1600, Train loss: 1.07, Val loss: 1.18
Epoch 1800, Train loss: 1.06, Val loss: 1.17
Epoch 2000, Train loss: 1.05, Val loss: 1.16
Epoch 2200, Train loss: 1.05, Val loss: 1.15
Epoch 2400, Train loss: 1.04, Val loss: 1.14
Epoch 2600, Train loss: 1.03, Val loss: 1.13
Epoch 2800, Train loss: 1.02, Val loss: 1.13
Epoch 3000, Train loss: 1.02, Val loss: 1.12
Epoch 3200, Train loss: 1.01, Val loss: 1.11
Epoch 3400, Train loss: 1.00, Val loss: 1.10
Epoch 3600, Train loss: 1.00, Val loss: 1.09
Epoch 3800, Train loss: 0.99, Val loss: 1.08
Epoch 4000, Train loss: 0.98, Val loss: 1.08
Epoch 4200, Train loss: 0.98, Val loss: 1.07
Epoch 4400, Train

In [6]:
print(f"Learned weights: {model.linear.weight.data} | True values: {true_weights}")
print(f"Learned bias: {model.linear.bias.data} | True value: {true_bias}" )


Learned weights: tensor([[ 0.9884,  0.0990, -0.0150]], device='cuda:0') | True values: tensor([[ 0.5000],
        [20.0000],
        [-0.3000]])
Learned bias: tensor([-1.0901e-09], device='cuda:0') | True value: 30.0


In [7]:
# pull scaler stats (NumPy) and convert to torch on CPU
mx = torch.tensor(x_scaler.mean_,  dtype=torch.float32)      # [3]
sx = torch.tensor(x_scaler.scale_, dtype=torch.float32).clamp_min(1e-8)  # [3]
my = torch.tensor(y_scaler.mean_,  dtype=torch.float32)      # [1]
sy = torch.tensor(y_scaler.scale_, dtype=torch.float32).clamp_min(1e-8)  # [1]

# trained (normalized-space) params
Wn = model.linear.weight.detach().cpu().squeeze(0)   # [3]
bn = model.linear.bias.detach().cpu().squeeze(0)     # []

# convert to original scale
W_orig = sy.squeeze(0) * (Wn / sx)                                # [3]
b_orig = sy.squeeze(0) * (bn - (mx / sx) @ Wn) + my.squeeze(0)    # []

print("True   W:", true_weights.squeeze().tolist(), " b:", float(true_bias))
print("Learned W:", W_orig.tolist(), " b:", float(b_orig))

# evaluate MSE in original scale
with torch.no_grad():
    yhat_test_orig = torch.from_numpy(
        y_scaler.inverse_transform(model(Xte).detach().cpu().numpy())
    )
    mse_orig = ((yhat_test_orig - y_test.cpu())**2).mean().item()
print(f"MSE (original scale): {mse_orig:.3f}")

True   W: [0.5, 20.0, -0.30000001192092896]  b: 30.0
Learned W: [0.500735878944397, 19.950336456298828, -0.29907187819480896]  b: 29.71417236328125
MSE (original scale): 100.692
