In [16]:
# This is file for train, prediction

import pandas as pd

dirpath = 'C:/Python/Used-Car-Price-Regression-DACON/'

train = pd.read_csv('data/modified_train.csv')
test = pd.read_csv('data/modified_test.csv')

In [17]:
train.head()

Unnamed: 0,title,odometer,location,isimported,engine,transmission,fuel,paint,year,brand,target
0,147,18277,0,0,3,0,0,0,2016,36,13665000
1,93,10,0,2,3,0,0,10,2019,36,33015000
2,55,83091,0,0,4,0,0,0,2012,31,9915000
3,122,91524,0,0,3,0,0,6,2007,6,3815000
4,116,94177,0,0,4,0,0,0,2010,36,7385000


In [18]:
test.head()

Unnamed: 0,title,odometer,location,isimported,engine,transmission,fuel,paint,year,brand
0,14,1234,1,2,3,0,0,11,2017,34
1,88,29938,1,0,3,0,0,11,2013,14
2,29,87501,0,0,3,0,0,10,2012,34
3,91,180894,0,1,4,0,0,6,2001,36
4,17,104814,0,0,3,0,0,11,2000,36


In [19]:
# train data for scaler fitting

Y = train[ ['target'] ].values
X = train[ ['title', 'odometer', 'location', 'isimported', 'engine', 'transmission', 'fuel', 'paint', 'year', 'brand' ] ].values

In [20]:
from sklearn.preprocessing import MinMaxScaler

scalerX = MinMaxScaler()
scalerX.fit(X)

scalerY = MinMaxScaler()
scalerY.fit(Y)

In [21]:
import torch

X = test[ ['title', 'odometer', 'location', 'isimported', 'engine', 'transmission', 'fuel', 'paint', 'year', 'brand' ] ].values

X = scalerX.transform(X)
Y = torch.zeros(436, 1)

In [22]:
from torch import nn

class Regressor(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Sequential(
            nn.Linear(10, 64, bias=False),
            nn.BatchNorm1d(64, eps=1e-05, momentum=0.1),
            nn.ReLU()
        )

        self.layer2 = nn.Sequential(
            nn.Linear(64, 128, bias=False),
            nn.BatchNorm1d(128, eps=1e-05, momentum=0.1),
            nn.ReLU()
        )

        self.layer3 = nn.Sequential(
            nn.Linear(128, 256, bias=False),
            nn.BatchNorm1d(256, eps=1e-05, momentum=0.1),
            nn.ReLU()
        )

        self.layer4 = nn.Linear(256, 1, bias=False)

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        return x

In [23]:
from torch.utils.data import Dataset

class TensorData(Dataset):

    def __init__(self, x_data, y_data):
        self.x_data = torch.FloatTensor(x_data)
        self.y_data = torch.FloatTensor(y_data)
        self.len = self.y_data.shape[0]

    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    def __len__(self):
        return self.len

In [24]:
from torch.utils.data import DataLoader

testsets = TensorData(X, Y)
testloader = DataLoader(testsets, batch_size=16)

print(test)

     title  odometer  location  isimported  engine  transmission  fuel  paint  \
0       14      1234         1           2       3             0     0     11   
1       88     29938         1           0       3             0     0     11   
2       29     87501         0           0       3             0     0     10   
3       91    180894         0           1       4             0     0      6   
4       17    104814         0           0       3             0     0     11   
..     ...       ...       ...         ...     ...           ...   ...    ...   
431    133     78175         0           0       4             0     0      3   
432    182    129223         0           0       4             0     0      0   
433    167    100943         0           0       3             0     0     10   
434     22     81463         0           0       3             0     0      7   
435     91       646         0           0       4             0     0     16   

     year  brand  
0    201

In [25]:
def predict(model, testloader):
    model.eval()  #evaluation 과정에서 사용하지 않아야 하는 layer들을 알아서 off 시키도록 하는 함수
    model_pred = []

    with torch.no_grad():  #파라미터 업데이트 안하기 때문에 no_grad 사용
        for i, data in enumerate(testloader, 0):
            inputs, values = data
            outputs = model(inputs)

            model_pred.extend(outputs.tolist())
    return model_pred

In [26]:
checkpoint = torch.load(dirpath + "best_model/" + "0.023705_best_model.pth")
model = Regressor()
model.load_state_dict(checkpoint)

pred = predict(model, testloader)
pred = scalerY.inverse_transform(pred)

In [27]:
print(pred)

[[ 3.45089869e+07]
 [ 2.95404928e+06]
 [ 8.45951301e+06]
 [ 1.42932454e+06]
 [ 4.33424685e+06]
 [ 2.89354960e+06]
 [ 1.91812533e+06]
 [ 8.23304230e+06]
 [ 1.48766196e+07]
 [ 3.90506539e+06]
 [ 3.77966267e+07]
 [ 7.09151966e+06]
 [ 3.05141383e+06]
 [ 3.24612982e+06]
 [ 4.72257805e+06]
 [ 3.06500113e+06]
 [ 8.47492108e+05]
 [-1.24108006e+06]
 [ 2.68849696e+06]
 [ 1.99626084e+06]
 [ 1.07740618e+06]
 [ 2.86502646e+06]
 [ 2.44334064e+06]
 [ 1.22945142e+07]
 [ 3.32088062e+06]
 [ 3.38908357e+06]
 [ 1.86903647e+06]
 [ 8.69435066e+05]
 [ 1.45722065e+06]
 [ 2.27117234e+06]
 [-2.06054529e+06]
 [ 9.07350618e+06]
 [ 2.90647030e+06]
 [ 4.83959425e+06]
 [ 4.93347223e+06]
 [ 3.79881960e+06]
 [ 3.06610554e+06]
 [-8.20770781e+05]
 [ 3.01486137e+06]
 [ 5.28047858e+06]
 [ 2.13507230e+06]
 [ 4.11581410e+06]
 [ 2.38510997e+06]
 [ 1.12152993e+07]
 [ 5.82357955e+06]
 [ 8.07493228e+06]
 [ 9.04515833e+06]
 [ 3.57925543e+07]
 [ 2.40202959e+06]
 [ 3.68610789e+06]
 [ 1.02598985e+07]
 [ 3.07225712e+06]
 [ 2.0094358

In [28]:
submission = pd.read_csv( dirpath + "sample_submission.csv")
submission['target'] = pred

submission.to_csv( dirpath + "submit.csv", index=False)