In [None]:
import torch
import torch.nn as nn
import torch.functional as F
import matplotlib.pyplot as plt

import pandas as pd
import numpy as np

import numbers

In [None]:
data: pd.DataFrame = pd.read_csv("./data/train.csv", index_col="Id")

In [None]:
data_numerical = data.select_dtypes(include='number')
selected_columns = data_numerical.columns[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 22, 24, 26, 27, 28, 29]]
selected_data = data_numerical[selected_columns].copy()
selected_data.dropna(axis=1, inplace=True)

In [None]:
data_ = torch.tensor(selected_data.values.astype(np.float32))

mean = torch.mean(data_, dim=0)
std = torch.std(data_, dim=0)
std[std == 0] = 1

data_normalized = (data_ - mean) / std

X = data_normalized.requires_grad_(True)
y = torch.tensor(data["SalePrice"].values.astype(np.float32)).log()

my_module = nn.Sequential(
    nn.LazyLinear(20),
    nn.ReLU(),
    nn.LazyLinear(15),
    nn.ReLU(),
    nn.LazyLinear(1)
)

# Set up the optimizer and loss function
optimizer = torch.optim.Adam(my_module.parameters(), lr=0.01)
loss_fn = nn.MSELoss()

# Training loop
losses = []
num_epochs = 22000
for epoch in range(num_epochs):
    # Forward pass
    outputs = my_module(X)
    loss = loss_fn(outputs, y.view(-1, 1))
    losses.append(loss.item())

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Print intermediate results
    if (epoch+1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

print('Training finished.')

In [None]:
plt.plot(range(500, len(losses[500:]) + 500), losses[500:])

In [None]:
X.shape

In [None]:
test_data: pd.DataFrame = pd.read_csv("./data/test.csv", index_col="Id")

In [None]:
data.columns

In [None]:
data_test = test_data[[c for c in selected_data.columns if c != "SalePrice"]].copy()
data_test_tensor = torch.tensor(data_test.values.astype(np.float32))

data_test_normalized = (data_test_tensor - data_test_tensor.mean()) / data_test_tensor.std()

In [None]:
import numpy as np
import torch

# Assuming 'test_data' and 'selected_data' are already defined
data_test = test_data[[c for c in selected_data.columns if c != "SalePrice"]].copy()

# Fill NaN values with the mean of each column
data_test.fillna(data_test.mean(), inplace=True)

# Convert the DataFrame to a PyTorch tensor
data_test_tensor = torch.tensor(data_test.values.astype(np.float32))

# Normalize the tensor
mean = data_test_tensor.mean(dim=0)
std = data_test_tensor.std(dim=0)
data_test_normalized = (data_test_tensor - mean) / std

# Ensure no NaN entries in the normalized data
assert not torch.isnan(data_test_normalized).any(), "There are NaNs in the normalized data"

In [None]:
data_test["SalePrice"] = my_module(data_test_normalized.detach()).detach().exp()

In [None]:
data_submission = data_test.reset_index().copy()

In [None]:
print(data_submission[["Id", "SalePrice"]].to_csv("submission.csv", index=False))

In [None]:
data