In [111]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from tqdm import tqdm
import numpy as np
import pandas as pd
from sklearn import preprocessing
import matplotlib.pyplot as plt
import cv2
import os

In [112]:
def data_preparation(train_data_path: str, test_data_path: str) -> tuple[pd.DataFrame, pd.DataFrame]:
    raw_train_data_df = pd.read_csv(train_data_path, index_col=0)
    train_SalePrice = raw_train_data_df.pop("SalePrice")
    numeric_features = raw_train_data_df.select_dtypes(include="number").columns.tolist()
    train_data_df = pd.get_dummies(raw_train_data_df, dtype="float32")
    train_data_df.fillna(value=0.0, inplace=True)
    train_data_df["SalePrice"] = train_SalePrice.apply(np.log)

    raw_test_data_df = pd.read_csv(test_data_path, index_col=0)
    test_data_df = pd.get_dummies(raw_test_data_df, dtype="float32")
    test_data_df = test_data_df.reindex(columns=train_data_df.columns, fill_value=0)
    test_data_df.fillna(value=0.0, inplace=True)

    standard_scaler = preprocessing.StandardScaler()
    train_data_df[numeric_features] = standard_scaler.fit_transform(train_data_df[numeric_features])
    test_data_df[numeric_features] = standard_scaler.transform(test_data_df[numeric_features])

    return train_data_df, test_data_df

In [113]:
class HouseDataset(torch.utils.data.Dataset):
    def __init__(self, df: pd.DataFrame):
        prices = df.pop("SalePrice").values
        features = df.values
        
        self.features = torch.tensor(features, dtype=torch.float32)
        self.prices = torch.tensor(prices, dtype=torch.float32)
        
    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, idx):
        return self.features[idx], self.prices[idx]

In [114]:
train_path = "./data/train.csv"
test_path = "./data/test.csv"

full_train_dataset_df, test_dataset_df = data_preparation(train_path, test_path)
full_train_dataset = HouseDataset(full_train_dataset_df)

print(type(full_train_dataset))
print(type(full_train_dataset[0]))
print(type(full_train_dataset[0][0]))
print(type(full_train_dataset[0][1]))
print(full_train_dataset[0][1])
print(full_train_dataset[0][0].shape)
print(full_train_dataset[0][1].shape)
print(full_train_dataset[0][0])

<class '__main__.HouseDataset'>
<class 'tuple'>
<class 'torch.Tensor'>
<class 'torch.Tensor'>
tensor(12.2477)
torch.Size([287])
torch.Size([])
tensor([ 0.0734,  0.2129, -0.2071,  0.6515, -0.5172,  1.0510,  0.8787,  0.5141,
         0.5754, -0.2887, -0.9446, -0.4593, -0.7934,  1.1619, -0.1202,  0.3703,
         1.1078, -0.2411,  0.7897,  1.2276,  0.1638, -0.2115,  0.9122, -0.9512,
         0.2960,  0.3117,  0.3510, -0.7522,  0.2165, -0.3593, -0.1163, -0.2702,
        -0.0687, -0.0877, -1.5991,  0.1388,  0.0000,  0.0000,  0.0000,  1.0000,
         0.0000,  0.0000,  1.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         1.0000,  0.0000,  0.0000,  0.0000,  1.0000,  1.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  1.0000,  1.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  1.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0

In [115]:
train_path = "./data/train.csv"
test_path = "./data/test.csv"

full_train_dataset_df, test_dataset_df = data_preparation(train_path, test_path)

full_train_dataset = HouseDataset(full_train_dataset_df)

train_dataset, val_dataset = torch.utils.data.random_split(full_train_dataset, [0.95, 0.05])
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=16 ,shuffle=True)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=16)

test_dataset = HouseDataset(test_dataset_df)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=16)

In [116]:
print(len(full_train_dataset))
print(len(train_dataset))
print(len(val_dataset))
a, b = full_train_dataset[0]
print(a)
print(b)


1460
1387
73
tensor([ 0.0734,  0.2129, -0.2071,  0.6515, -0.5172,  1.0510,  0.8787,  0.5141,
         0.5754, -0.2887, -0.9446, -0.4593, -0.7934,  1.1619, -0.1202,  0.3703,
         1.1078, -0.2411,  0.7897,  1.2276,  0.1638, -0.2115,  0.9122, -0.9512,
         0.2960,  0.3117,  0.3510, -0.7522,  0.2165, -0.3593, -0.1163, -0.2702,
        -0.0687, -0.0877, -1.5991,  0.1388,  0.0000,  0.0000,  0.0000,  1.0000,
         0.0000,  0.0000,  1.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         1.0000,  0.0000,  0.0000,  0.0000,  1.0000,  1.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  1.0000,  1.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  1.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  1.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  

In [117]:
class HouseNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(287, 200)
        self.dropout1 = nn.Dropout(0.2)
        self.fc2 = nn.Linear(200, 200)
        self.dropout2 = nn.Dropout(0.2)
        self.fc3 = nn.Linear(200, 1)
        self.activation = nn.ReLU()
        
    def forward(self, x):
        x = self.dropout1(self.activation(self.fc1(x)))
        x = self.dropout2(self.activation(self.fc2(x)))
        x = self.fc3(x)
        return x
    

In [118]:
model = HouseNetwork()
loss_fun = nn.MSELoss()
#optimizer = torch.optim.SGD(model.parameters(), lr = 0.0001, momentum=0.9)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
epochs = 10000

model.train()
# Цикл обучения
for epoch in range(epochs):
    epoch_loss = 0.0
    for house, price in train_dataloader:
        optimizer.zero_grad()
        prediction = model(house)
        prediction = prediction.squeeze()
        loss = torch.sqrt(loss_fun(prediction, price))
        epoch_loss += loss.item()
        loss.backward()
        optimizer.step()
    print(f'Epoch {epoch + 1}/{epochs}, Loss: {epoch_loss:.4f}')

Epoch 1/10000, Loss: 266.6138
Epoch 2/10000, Loss: 87.0918
Epoch 3/10000, Loss: 86.0419
Epoch 4/10000, Loss: 82.7271
Epoch 5/10000, Loss: 78.6317
Epoch 6/10000, Loss: 78.6383
Epoch 7/10000, Loss: 82.8603
Epoch 8/10000, Loss: 81.1836
Epoch 9/10000, Loss: 73.9973
Epoch 10/10000, Loss: 74.8295
Epoch 11/10000, Loss: 75.4542
Epoch 12/10000, Loss: 75.7228
Epoch 13/10000, Loss: 73.7525
Epoch 14/10000, Loss: 76.5819
Epoch 15/10000, Loss: 77.2193
Epoch 16/10000, Loss: 73.2065
Epoch 17/10000, Loss: 74.2983
Epoch 18/10000, Loss: 72.0229
Epoch 19/10000, Loss: 74.4093
Epoch 20/10000, Loss: 72.3207
Epoch 21/10000, Loss: 70.5494
Epoch 22/10000, Loss: 73.2126
Epoch 23/10000, Loss: 71.7530
Epoch 24/10000, Loss: 69.3165
Epoch 25/10000, Loss: 69.0209
Epoch 26/10000, Loss: 74.1402
Epoch 27/10000, Loss: 72.3817
Epoch 28/10000, Loss: 69.2338
Epoch 29/10000, Loss: 70.0240
Epoch 30/10000, Loss: 70.1953
Epoch 31/10000, Loss: 68.4208
Epoch 32/10000, Loss: 67.9261
Epoch 33/10000, Loss: 69.7585
Epoch 34/10000, Lo

In [None]:
print(model)

In [None]:
print(train_dataset[0][0])

In [None]:
model.eval()
print(np.exp(val_dataset[10][1]).item())
print(np.exp(model(val_dataset[10][0]).item()))

In [None]:
torch.save(model, 'model.pt')

In [None]:
Id = [id + 1461 for id in range(len(test_dataset))]
print(Id)

In [110]:
Id = [id + 1461 for id in range(len(test_dataset))]
print(Id)
SalePrice = [np.exp(model(test_dataset[i][0]).item()) for i in range(len(test_dataset))]
print(SalePrice)

submission = pd.DataFrame({'Id': Id, 'SalePrice': SalePrice})
print(submission)    
submission.to_csv('submission.csv', index=False)

[1461, 1462, 1463, 1464, 1465, 1466, 1467, 1468, 1469, 1470, 1471, 1472, 1473, 1474, 1475, 1476, 1477, 1478, 1479, 1480, 1481, 1482, 1483, 1484, 1485, 1486, 1487, 1488, 1489, 1490, 1491, 1492, 1493, 1494, 1495, 1496, 1497, 1498, 1499, 1500, 1501, 1502, 1503, 1504, 1505, 1506, 1507, 1508, 1509, 1510, 1511, 1512, 1513, 1514, 1515, 1516, 1517, 1518, 1519, 1520, 1521, 1522, 1523, 1524, 1525, 1526, 1527, 1528, 1529, 1530, 1531, 1532, 1533, 1534, 1535, 1536, 1537, 1538, 1539, 1540, 1541, 1542, 1543, 1544, 1545, 1546, 1547, 1548, 1549, 1550, 1551, 1552, 1553, 1554, 1555, 1556, 1557, 1558, 1559, 1560, 1561, 1562, 1563, 1564, 1565, 1566, 1567, 1568, 1569, 1570, 1571, 1572, 1573, 1574, 1575, 1576, 1577, 1578, 1579, 1580, 1581, 1582, 1583, 1584, 1585, 1586, 1587, 1588, 1589, 1590, 1591, 1592, 1593, 1594, 1595, 1596, 1597, 1598, 1599, 1600, 1601, 1602, 1603, 1604, 1605, 1606, 1607, 1608, 1609, 1610, 1611, 1612, 1613, 1614, 1615, 1616, 1617, 1618, 1619, 1620, 1621, 1622, 1623, 1624, 1625, 1626, 162