In [1]:
import torch
import numpy as np
import torchvision
import torch.nn as nn
import pandas as pd
import torch.optim as optim
import torch.functional as F
import torch.utils.data as data
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv("train_data.csv")

In [3]:
df

Unnamed: 0,SalePrice,YearBuilt,Size(sqf),Floor,HallwayType,HeatingType,AptManageType,N_Parkinglot(Ground),N_Parkinglot(Basement),TimeToBusStop,TimeToSubway,N_manager,N_elevators,SubwayStation,N_FacilitiesInApt,N_FacilitiesNearBy(Total),N_SchoolNearBy(Total)
0,141592,2006,814,3,terraced,individual_heating,management_in_trust,111.0,184.0,5min~10min,10min~15min,3.0,0.0,Kyungbuk_uni_hospital,5,6.0,9.0
1,51327,1985,587,8,corridor,individual_heating,self_management,80.0,76.0,0~5min,5min~10min,2.0,2.0,Daegu,3,12.0,4.0
2,48672,1985,587,6,corridor,individual_heating,self_management,80.0,76.0,0~5min,5min~10min,2.0,2.0,Daegu,3,12.0,4.0
3,380530,2006,2056,8,terraced,individual_heating,management_in_trust,249.0,536.0,0~5min,0-5min,5.0,11.0,Sin-nam,5,3.0,7.0
4,78318,1992,644,2,mixed,individual_heating,self_management,142.0,79.0,5min~10min,15min~20min,4.0,8.0,Myung-duk,3,9.0,14.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4119,570796,2007,1928,24,terraced,individual_heating,management_in_trust,0.0,1270.0,0~5min,0-5min,14.0,16.0,Kyungbuk_uni_hospital,10,9.0,10.0
4120,307079,2015,644,22,terraced,individual_heating,management_in_trust,102.0,400.0,0~5min,5min~10min,5.0,10.0,Daegu,7,7.0,11.0
4121,357522,2007,868,20,terraced,individual_heating,management_in_trust,0.0,1270.0,0~5min,0-5min,14.0,16.0,Kyungbuk_uni_hospital,10,9.0,10.0
4122,312389,1978,1327,1,corridor,individual_heating,self_management,87.0,0.0,0~5min,0-5min,1.0,4.0,Kyungbuk_uni_hospital,3,7.0,11.0


In [4]:
categorical_columns = ["HallwayType", "SubwayStation"]

print(df["TimeToBusStop"].unique())
print(df["TimeToSubway"].unique())

['5min~10min' '0~5min' '10min~15min']
['10min~15min' '5min~10min' '0-5min' '15min~20min' 'no_bus_stop_nearby']


In [5]:
i = 0.0
dataType = "TimeToBusStop"
df.loc[df[dataType] == '0~5min', dataType] = np.float64(2.0)
df.loc[df[dataType] == '5min~10min', dataType] = np.float64(1.0)
df.loc[df[dataType] == '10min~15min', dataType] = np.float64(0.0)
print(df[dataType].unique())

i = 0.0
dataType = "TimeToSubway"
df.loc[df[dataType] == 'no_bus_stop_nearby', dataType] = np.float64(0.0)
df.loc[df[dataType] == '0-5min', dataType] = np.float64(4.0)
df.loc[df[dataType] == '5min~10min', dataType] = np.float64(3.0)
df.loc[df[dataType] == '10min~15min', dataType] = np.float64(2.0)
df.loc[df[dataType] == '15min~20min', dataType] = np.float64(1.0)
print(df[dataType].unique())

print(type(df[dataType].unique()[0]))

[1.0 2.0 0.0]
[2.0 3.0 4.0 1.0 0.0]
<class 'numpy.float64'>


In [6]:
df.HeatingType = (df.HeatingType==df["HeatingType"].unique()[0]).astype(int)
df.AptManageType = (df.AptManageType==df["AptManageType"].unique()[0]).astype(int)

In [7]:
categorical_values = pd.get_dummies(df[categorical_columns])
categorical_values.head()

Unnamed: 0,HallwayType_corridor,HallwayType_mixed,HallwayType_terraced,SubwayStation_Bangoge,SubwayStation_Banwoldang,SubwayStation_Chil-sung-market,SubwayStation_Daegu,SubwayStation_Kyungbuk_uni_hospital,SubwayStation_Myung-duk,SubwayStation_Sin-nam,SubwayStation_no_subway_nearby
0,0,0,1,0,0,0,0,1,0,0,0
1,1,0,0,0,0,0,1,0,0,0,0
2,1,0,0,0,0,0,1,0,0,0,0
3,0,0,1,0,0,0,0,0,0,1,0
4,0,1,0,0,0,0,0,0,1,0,0


In [8]:
df.drop(columns=categorical_columns,inplace=True)

In [9]:
train_indices = np.random.rand(len(df))>0.3

In [10]:
df

Unnamed: 0,SalePrice,YearBuilt,Size(sqf),Floor,HeatingType,AptManageType,N_Parkinglot(Ground),N_Parkinglot(Basement),TimeToBusStop,TimeToSubway,N_manager,N_elevators,N_FacilitiesInApt,N_FacilitiesNearBy(Total),N_SchoolNearBy(Total)
0,141592,2006,814,3,1,1,111.0,184.0,1.0,2.0,3.0,0.0,5,6.0,9.0
1,51327,1985,587,8,1,0,80.0,76.0,2.0,3.0,2.0,2.0,3,12.0,4.0
2,48672,1985,587,6,1,0,80.0,76.0,2.0,3.0,2.0,2.0,3,12.0,4.0
3,380530,2006,2056,8,1,1,249.0,536.0,2.0,4.0,5.0,11.0,5,3.0,7.0
4,78318,1992,644,2,1,0,142.0,79.0,1.0,1.0,4.0,8.0,3,9.0,14.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4119,570796,2007,1928,24,1,1,0.0,1270.0,2.0,4.0,14.0,16.0,10,9.0,10.0
4120,307079,2015,644,22,1,1,102.0,400.0,2.0,3.0,5.0,10.0,7,7.0,11.0
4121,357522,2007,868,20,1,1,0.0,1270.0,2.0,4.0,14.0,16.0,10,9.0,10.0
4122,312389,1978,1327,1,1,0,87.0,0.0,2.0,4.0,1.0,4.0,3,7.0,11.0


In [11]:
for column in df.columns:
    print(type(df[column].unique()[0]))

<class 'numpy.int64'>
<class 'numpy.int64'>
<class 'numpy.int64'>
<class 'numpy.int64'>
<class 'numpy.int32'>
<class 'numpy.int32'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.float64'>
<class 'numpy.int64'>
<class 'numpy.float64'>
<class 'numpy.float64'>


In [18]:
print(df.values[train_indices,1:])

numerical_data = torch.from_numpy(df.values[train_indices,1:]).float()
categorical_data = torch.from_numpy(categorical_values.values[train_indices]).float()
targets = torch.from_numpy(df.values[train_indices,-1]).float()

test_numerical_data = torch.from_numpy(df.values[~train_indices,:-1]).float()
test_categorical_data = torch.from_numpy(categorical_values.values[~train_indices]).float()
test_targets = torch.from_numpy(df.values[~train_indices,-1]).float()

[[1985 587 8 ... 3 12.0 4.0]
 [2006 2056 8 ... 5 3.0 7.0]
 [1992 644 2 ... 3 9.0 14.0]
 ...
 [2007 1928 24 ... 10 9.0 10.0]
 [2015 644 22 ... 7 7.0 11.0]
 [2007 868 13 ... 10 9.0 10.0]]


TypeError: can't convert np.ndarray of type numpy.object_. The only supported types are: float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint8, and bool.

In [None]:
# Policzenie liczby wejść do sieci
inputs_num = len(df.columns)
print(inputs_num)

16


In [None]:
train=df.sample(frac=0.8,random_state=20) #random state is a seed value
test=df.drop(train.index)

train_dataset = data.TensorDataset(torch.from_numpy(train[:,:-1]).float(),torch.from_numpy(train[:,-1]).float())
test_dataset = data.TensorDataset(torch.from_numpy(test[:,:-1]).float(),torch.from_numpy(test[:,-1]).float())

print(train_dataset)

InvalidIndexError: (slice(None, None, None), slice(None, -1, None))

In [None]:
class AffordabilityEstim(nn.Module):
    def __init__(self, num_inputs, num_outputs):
        super(AffordabilityEstim, self).__init__()
        self.lin1 =nn.Linear(num_inputs, 48)  # 28 x 28 = 784
        self.bn1 = nn.BatchNorm1d(48)
        self.act1 =nn.ReLU()
        self.lin2 =nn.Linear(48, 24)
        self.bn2 = nn.BatchNorm1d(24)
        self.act2 =nn.ReLU()
        self.lin3 =nn.Linear(24, num_outputs)


    def forward(self, x):
        x = self.lin1(x)
        x = self.bn1(x)
        x = self.act1(x)
        x = self.lin2(x)
        x = self.bn2(x)
        x = self.act2(x)
        x = self.lin3(x)
        return x

In [None]:
model = AffordabilityEstim(inputs_num, 1)
print(model)

AffordabilityEstim(
  (lin1): Linear(in_features=16, out_features=48, bias=True)
  (bn1): BatchNorm1d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act1): ReLU()
  (lin2): Linear(in_features=48, out_features=24, bias=True)
  (bn2): BatchNorm1d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act2): ReLU()
  (lin3): Linear(in_features=24, out_features=1, bias=True)
)


In [None]:
df

Unnamed: 0_level_0,YearBuilt,Size(sqf),Floor,HallwayType,HeatingType,AptManageType,N_Parkinglot(Ground),N_Parkinglot(Basement),TimeToBusStop,TimeToSubway,N_manager,N_elevators,SubwayStation,N_FacilitiesInApt,N_FacilitiesNearBy(Total),N_SchoolNearBy(Total)
SalePrice,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
141592,2006,814,3,0.0,1.0,0.0,111.0,184.0,0.0,0.0,3.0,0.0,0.0,5,6.0,9.0
51327,1985,587,8,1.0,1.0,1.0,80.0,76.0,1.0,1.0,2.0,2.0,1.0,3,12.0,4.0
48672,1985,587,6,1.0,1.0,1.0,80.0,76.0,1.0,1.0,2.0,2.0,1.0,3,12.0,4.0
380530,2006,2056,8,0.0,1.0,0.0,249.0,536.0,1.0,2.0,5.0,11.0,2.0,5,3.0,7.0
78318,1992,644,2,2.0,1.0,1.0,142.0,79.0,0.0,3.0,4.0,8.0,3.0,3,9.0,14.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
570796,2007,1928,24,0.0,1.0,0.0,0.0,1270.0,1.0,2.0,14.0,16.0,0.0,10,9.0,10.0
307079,2015,644,22,0.0,1.0,0.0,102.0,400.0,1.0,1.0,5.0,10.0,1.0,7,7.0,11.0
357522,2007,868,20,0.0,1.0,0.0,0.0,1270.0,1.0,2.0,14.0,16.0,0.0,10,9.0,10.0
312389,1978,1327,1,1.0,1.0,1.0,87.0,0.0,1.0,2.0,1.0,4.0,0.0,3,7.0,11.0


In [None]:
def classify(x):
    # CHEAP
    if x <= 100000:
        return 0
    # AVERAGE
    if x > 100000 and x <= 350000:
        return 1
    # EXPENSIVE 
    if x > 350000:
        return 2

In [None]:
loss_fn = nn.MSELoss()
opt = optim.SGD(model.parameters(), lr=0.001, momentum=0.5)

In [None]:
# epochs = 2

# for epoch in range(epochs):
#     for i, batch in enumerate(data_loader, 0):

#         inputs, labels = batch

#         # training steps for normal model
#         opt.zero_grad()
#         outputs = model(inputs).squeeze()
#         loss = loss_fn(outputs, labels)
#         loss.backward()
#         opt.step()
