In [10]:
import torch
from torch import nn, optim
import pandas as pd
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset
import csv

In [11]:
device = torch.device("cuda")
data = pd.read_csv('train_data.csv')
data_dropped = data.drop(columns=['HallwayType','HeatingType','AptManageType','SubwayStation', 'TimeToBusStop', 'TimeToSubway'])
pd_HallwayTypepd = pd.get_dummies(data['HallwayType'],  dtype=float)
pd_HeatingType = pd.get_dummies(data['HeatingType'],  dtype=float)
pd_AptManageType = pd.get_dummies(data['AptManageType'],  dtype=float)
pd_SubwayStation = pd.get_dummies(data['SubwayStation'],  dtype=float)
pd_TimeToSubway = pd.get_dummies(data['TimeToSubway'],  dtype=float)
pd_TimeToBusStop = pd.get_dummies(data['TimeToBusStop'],  dtype=float)
data_encoded = pd.concat([pd_HallwayTypepd,pd_HeatingType ,pd_AptManageType,pd_SubwayStation,pd_TimeToSubway,pd_TimeToBusStop , data_dropped], axis=1, join='outer')
X = data_encoded.drop(columns=['SalePrice']).values
Y = data_encoded['SalePrice'].values

[[ 0.  0.  1. ...  5.  6.  9.]
 [ 1.  0.  0. ...  3. 12.  4.]
 [ 1.  0.  0. ...  3. 12.  4.]
 ...
 [ 0.  0.  1. ... 10.  9. 10.]
 [ 1.  0.  0. ...  3.  7. 11.]
 [ 0.  0.  1. ... 10.  9. 10.]]
[141592  51327  48672 ... 357522 312389 393805]
(4124, 33)
(4124,)


In [12]:
test_data = pd.read_csv('test_data.csv')
test_data_dropped = test_data.drop(columns=['HallwayType','HeatingType','AptManageType','SubwayStation', 'TimeToBusStop', 'TimeToSubway'])
pd_HallwayTypepd = pd.get_dummies(test_data['HallwayType'],  dtype=float)
pd_HeatingType = pd.get_dummies(test_data['HeatingType'],  dtype=float)
pd_AptManageType = pd.get_dummies(test_data['AptManageType'],  dtype=float)
pd_SubwayStation = pd.get_dummies(test_data['SubwayStation'],  dtype=float)
pd_TimeToSubway = pd.get_dummies(test_data['TimeToSubway'],  dtype=float)
pd_TimeToBusStop = pd.get_dummies(test_data['TimeToBusStop'],  dtype=float)
test_data_encoded = pd.concat([pd_HallwayTypepd,pd_HeatingType ,pd_AptManageType,pd_SubwayStation,pd_TimeToSubway,pd_TimeToBusStop , test_data_dropped], axis=1, join='outer')

test_data = test_data_encoded.astype('float32')
test_data = torch.from_numpy(test_data.values[:,:])

tensor([[ 0.,  1.,  0.,  ...,  4., 14., 17.],
        [ 0.,  0.,  1.,  ...,  9., 14., 17.],
        [ 0.,  0.,  1.,  ...,  5.,  9.,  5.],
        ...,
        [ 0.,  1.,  0.,  ...,  4., 14., 17.],
        [ 0.,  0.,  1.,  ...,  8.,  7.,  9.],
        [ 0.,  1.,  0.,  ...,  4., 14., 17.]])


In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

tensor_list = []
for price in y_train:
    if price <= 100000:
        tensor_list.append(torch.tensor([1.0, 0.0, 0.0]))
    elif price > 350000:
        tensor_list.append(torch.tensor([0.0, 0.0, 1.0]))
    else:
        tensor_list.append(torch.tensor([0.0, 1.0, 0.0]))

y_train = torch.stack(tensor_list)


for idx, price in enumerate(y_test):
    if price <= 100000:
        y_test[idx] = 0
    elif price > 350000:
        y_test[idx] = 2
    else:
        y_test[idx] = 1


train_data = TensorDataset(X_train, y_train)
test_data = TensorDataset(X_test, y_test)
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
test_loader = DataLoader(test_data, batch_size=64, shuffle=True)


tensor([[0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        ...,
        [0., 1., 0.],
        [0., 0., 1.],
        [1., 0., 0.]])
tensor([2., 1., 2., 2., 2., 2., 1., 0., 1., 1., 1., 2., 0., 2., 1., 1., 2., 1.,
        1., 1., 1., 1., 0., 1., 1., 2., 0., 1., 0., 1., 1., 1., 2., 2., 1., 1.,
        1., 1., 0., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 2.,
        1., 2., 0., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 2., 1., 1., 1.,
        1., 1., 2., 0., 2., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1.,
        1., 1., 1., 1., 1., 2., 2., 0., 1., 0., 1., 1., 1., 1., 1., 1., 1., 2.,
        0., 1., 0., 1., 2., 1., 1., 1., 1., 1., 0., 1., 1., 1., 0., 2., 1., 2.,
        0., 0., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 2., 2., 1., 1., 1., 0.,
        1., 0., 1., 0., 0., 1., 0., 1., 2., 1., 1., 1., 1., 1., 1., 1., 0., 1.,
        1., 1., 1., 1., 0., 2., 0., 1., 1., 2., 1., 2., 1., 0., 1., 1., 1., 1.,
        2., 1., 1., 2., 1., 1., 0., 1., 0., 0., 1., 1.

In [14]:
class PricePredictionModel(nn.Module):

    def __init__(self, num_inputs):
        super().__init__()
        self.linear1 = nn.Linear(num_inputs, 64)
        self.act_fn = nn.ReLU()
        self.linear2 = nn.Linear(64, 64)
        self.linear3 = nn.Linear(64, 128)
        self.linear6 = nn.Linear(128, 64)
        self.linear7 = nn.Linear(64, 3)

    def forward(self, x):
        x = self.linear1(x)
        x = self.act_fn(x)
        x = self.linear2(x)
        x = self.act_fn(x)
        x = self.linear3(x)
        x = self.act_fn(x)
        x = self.linear6(x)
        x = self.act_fn(x)
        x = self.linear7(x)
        return x


In [15]:
# Inicjalizacja modelu, funkcji straty i optymizatora
num_inputs = 33
model = PricePredictionModel(num_inputs)
model.cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [16]:
# Trening modelu
epochs = 100
model.train()
for epoch in range(epochs):
    for inputs, labels in train_loader:
            inputs = inputs.cuda()
            labels = labels.cuda()
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')




Epoch [10/100], Loss: 0.3091
Epoch [20/100], Loss: 0.2170
Epoch [30/100], Loss: 0.2068
Epoch [40/100], Loss: 0.3155
Epoch [50/100], Loss: 0.3107
Epoch [60/100], Loss: 0.5449
Epoch [70/100], Loss: 0.5981
Epoch [80/100], Loss: 0.2535
Epoch [90/100], Loss: 0.4366
Epoch [100/100], Loss: 0.5023


In [17]:
model.eval()
true_preds, num_preds = 0., 0.

with torch.no_grad():
    for data_inputs, data_labels in test_loader:

        data_inputs, data_labels = data_inputs.to(device), data_labels.to(device)
        preds = model(data_inputs)
        class_preds = torch.empty(len(data_labels), dtype=torch.float32, device='cuda')
        for idx, pred in enumerate(preds):
            class_preds[idx] = torch.argmax(pred)

        pred_labels = (preds >= 0.5).long() 

        true_preds += (class_preds == data_labels).sum()
        num_preds += data_labels.shape[0]

acc = true_preds / num_preds
print(f"Accuracy of the model: {100.0*acc:4.2f}%")

Accuracy of the model: 84.12%
