In [1]:
import torch
from torch import nn, optim
import pandas as pd
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset
import csv



In [4]:
device = torch.device("cuda")
data = pd.read_csv('train_data.csv')

# Mapowanie wartości
mapping = {'0-5min': 0.0, '0~5min': 0.0, '5min~10min' : 1.0, '10min~15min' : 2.0,  '15min~20min' : 3.0, 'no_bus_stop_nearby' : 4.0}

data['TimeToSubway'] = data['TimeToSubway'].replace(mapping)
data['TimeToBusStop'] = data['TimeToBusStop'].replace(mapping)

# one hot encoding
data_dropped = data.drop(columns=['HallwayType','HeatingType','AptManageType','SubwayStation'])

pd_HallwayTypepd = pd.get_dummies(data['HallwayType'],  dtype=float)
pd_HeatingType = pd.get_dummies(data['HeatingType'],  dtype=float)
pd_AptManageType = pd.get_dummies(data['AptManageType'],  dtype=float)
pd_SubwayStation = pd.get_dummies(data['SubwayStation'],  dtype=float)


data_encoded = pd.concat([pd_HallwayTypepd,pd_HeatingType ,pd_AptManageType,pd_SubwayStation, data_dropped], axis=1, join='outer')
X = data_encoded.drop(columns=['SalePrice']).values
Y = data_encoded['SalePrice'].values
print(X)
print(Y)
print(X.shape)
print(Y.shape)

[[ 0.  0.  1. ...  5.  6.  9.]
 [ 1.  0.  0. ...  3. 12.  4.]
 [ 1.  0.  0. ...  3. 12.  4.]
 ...
 [ 0.  0.  1. ... 10.  9. 10.]
 [ 1.  0.  0. ...  3.  7. 11.]
 [ 0.  0.  1. ... 10.  9. 10.]]
[141592  51327  48672 ... 357522 312389 393805]
(4124, 27)
(4124,)


  data['TimeToSubway'] = data['TimeToSubway'].replace(mapping)
  data['TimeToBusStop'] = data['TimeToBusStop'].replace(mapping)


In [52]:
test = pd.read_csv('test_data.csv')

# Mapowanie wartości

test['TimeToSubway'] = test['TimeToSubway'].replace(mapping)
test['TimeToBusStop'] = test['TimeToBusStop'].replace(mapping)

# one hot encoding
test_dropped = test.drop(columns=['HallwayType','HeatingType','AptManageType','SubwayStation'])
pd_HallwayTypepd = pd.get_dummies(test['HallwayType'],  dtype=float)
pd_HeatingType = pd.get_dummies(test['HeatingType'],  dtype=float)
pd_AptManageType = pd.get_dummies(test['AptManageType'],  dtype=float)
pd_SubwayStation = pd.get_dummies(test['SubwayStation'],  dtype=float)
test_encoded = pd.concat([pd_HallwayTypepd,pd_HeatingType ,pd_AptManageType,pd_SubwayStation, test_dropped], axis=1, join='outer')

test = test_encoded.astype('float32')
test = torch.from_numpy(test.values[:,:])
print(test)

test_loader_test = DataLoader(test, batch_size=64, shuffle=False)
print(test_loader_test)


tensor([[ 0.,  1.,  0.,  ...,  4., 14., 17.],
        [ 0.,  0.,  1.,  ...,  9., 14., 17.],
        [ 0.,  0.,  1.,  ...,  5.,  9.,  5.],
        ...,
        [ 0.,  1.,  0.,  ...,  4., 14., 17.],
        [ 0.,  0.,  1.,  ...,  8.,  7.,  9.],
        [ 0.,  1.,  0.,  ...,  4., 14., 17.]])
<torch.utils.data.dataloader.DataLoader object at 0x76503b029900>


  test['TimeToSubway'] = test['TimeToSubway'].replace(mapping)
  test['TimeToBusStop'] = test['TimeToBusStop'].replace(mapping)


In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

tensor_list = []
for price in y_train:
    if price <= 100000:
        tensor_list.append(torch.tensor([1.0, 0.0, 0.0]))
    elif price > 350000:
        tensor_list.append(torch.tensor([0.0, 0.0, 1.0]))
    else:
        tensor_list.append(torch.tensor([0.0, 1.0, 0.0]))

y_train = torch.stack(tensor_list)

# tensor_list = []
for idx, price in enumerate(y_test):
    if price <= 100000:
        y_test[idx] = 0
    elif price > 350000:
        y_test[idx] = 2
        # tensor_list.append(torch.tensor([0.0, 0.0, 1.0]))
    else:
        y_test[idx] = 1
        # tensor_list.append(torch.tensor([0.0, 1.0, 0.0]))

# y_test = torch.stack(tensor_list)
print(y_train)
print(y_test)

train_data = TensorDataset(X_train, y_train)
test_data = TensorDataset(X_test, y_test)
train_loader = DataLoader(train_data, batch_size=64, shuffle=False)
test_loader = DataLoader(test_data, batch_size=64, shuffle=False)


tensor([[0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        ...,
        [0., 1., 0.],
        [0., 0., 1.],
        [1., 0., 0.]])
tensor([2., 1., 2., 2., 2., 2., 1., 0., 1., 1., 1., 2., 0., 2., 1., 1., 2., 1.,
        1., 1., 1., 1., 0., 1., 1., 2., 0., 1., 0., 1., 1., 1., 2., 2., 1., 1.,
        1., 1., 0., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 2.,
        1., 2., 0., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 2., 1., 1., 1.,
        1., 1., 2., 0., 2., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1.,
        1., 1., 1., 1., 1., 2., 2., 0., 1., 0., 1., 1., 1., 1., 1., 1., 1., 2.,
        0., 1., 0., 1., 2., 1., 1., 1., 1., 1., 0., 1., 1., 1., 0., 2., 1., 2.,
        0., 0., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 2., 2., 1., 1., 1., 0.,
        1., 0., 1., 0., 0., 1., 0., 1., 2., 1., 1., 1., 1., 1., 1., 1., 0., 1.,
        1., 1., 1., 1., 0., 2., 0., 1., 1., 2., 1., 2., 1., 0., 1., 1., 1., 1.,
        2., 1., 1., 2., 1., 1., 0., 1., 0., 0., 1., 1.

In [78]:
class PricePredictionModel(nn.Module):

    def __init__(self, num_inputs):
        super().__init__()
        self.linear1 = nn.Linear(num_inputs, 64)
        self.act_fn = nn.ReLU()
        self.linear2 = nn.Linear(64, 64)
        self.linear3 = nn.Linear(64, 128)
        # self.linear4 = nn.Linear(128, 256)
        # self.linear5 = nn.Linear(256, 128)
        self.linear6 = nn.Linear(128, 64)
        self.linear7 = nn.Linear(64, 3)

    def forward(self, x):
        # print(x.shape)
        x = self.linear1(x)
        x = self.act_fn(x)
        x = self.linear2(x)
        x = self.act_fn(x)
        x = self.linear3(x)
        x = self.act_fn(x)
        # x = self.linear4(x)
        # x = self.act_fn(x)
        # x = self.linear5(x)
        # x = self.act_fn(x)
        x = self.linear6(x)
        x = self.act_fn(x)
        x = self.linear7(x)
        return x


In [79]:
# Inicjalizacja modelu, funkcji straty i optymizatora
num_inputs = 27
model = PricePredictionModel(num_inputs)
# model.to(device)
model.cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [84]:
# Trening modelu
epochs = 70
model.train()
for epoch in range(epochs):
    for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            inputs = inputs.cuda()
            labels = labels.cuda()
            outputs = model(inputs)
            # print(labels.shape)
            # print(outputs.shape)
            loss = criterion(outputs, labels) #.view(-1, 1)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')




Epoch [10/70], Loss: 0.3420
Epoch [20/70], Loss: 0.3332
Epoch [30/70], Loss: 0.3383
Epoch [40/70], Loss: 0.2787
Epoch [50/70], Loss: 0.2813
Epoch [60/70], Loss: 0.3224
Epoch [70/70], Loss: 0.2942


In [85]:


model.eval() # Set model to eval mode
true_preds, num_preds = 0., 0.

with torch.no_grad(): # Deactivate gradients for the following code
    for data_inputs, data_labels in test_loader:
        # Determine prediction of model on dev set
        data_inputs, data_labels = data_inputs.to(device), data_labels.to(device)
        preds = model(data_inputs)
        class_preds = torch.empty(len(data_labels), dtype=torch.float32, device='cuda')
        for idx, pred in enumerate(preds):
            class_preds[idx] = torch.argmax(pred)
        # preds = preds.squeeze(dim=1)
        # preds = torch.sigmoid(preds) # Sigmoid to map predictions between 0 and 1
        pred_labels = (preds >= 0.5).long() # Binarize predictions to 0 and 1

        # Keep records of predictions for the accuracy metric (true_preds=TP+TN, num_preds=TP+TN+FP+FN)
        true_preds += (class_preds == data_labels).sum()
        num_preds += data_labels.shape[0]

acc = true_preds / num_preds
print(f"Accuracy of the model: {100.0*acc:4.2f}%")

Accuracy of the model: 83.52%


In [86]:
model.eval()
true_preds, num_preds = 0., 0.
total_loss = 0
with open('wyniki.csv', 'w') as f:
  write = csv.writer(f)
  with torch.no_grad():
      
      for data_inputs in test_loader_test:
        # Determine prediction of model on dev set
        data_inputs = data_inputs.to(device)
        preds = model(data_inputs)
        class_preds = torch.empty(len(data_inputs), dtype=torch.float32, device='cuda')
        for idx, pred in enumerate(preds):
            class_preds[idx] = torch.argmax(pred)  
            write.writerow([int(class_preds[idx].item())])