In [1]:
from tqdm import tqdm
import wandb
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,RobustScaler,MinMaxScaler,MaxAbsScaler,Normalizer
import torch,torchvision
from torch.nn import *
from torch.optim import *
import cv2
PROJECT_NAME = 'House-Prices-Advanced-Regression-Techniques-V8-Competition'
device = 'cuda'

In [2]:
data = pd.read_csv('./data/train.csv')

In [3]:
data.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,...,0,,,,0,2,2008,WD,Normal,208500
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,...,0,,,,0,5,2007,WD,Normal,181500
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,...,0,,,,0,9,2008,WD,Normal,223500
3,4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,...,0,,,,0,2,2006,WD,Abnorml,140000
4,5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,...,0,,,,0,12,2008,WD,Normal,250000


In [4]:
data = data['SalePrice']

In [5]:
data = torch.from_numpy(np.array(data.tolist())).view(1,-1)

In [6]:
data_input = data[:3,:-1]

In [7]:
data_target = data[:3,1:]

In [8]:
data_input

tensor([[208500, 181500, 223500,  ..., 210000, 266500, 142125]])

In [9]:
data_target

tensor([[181500, 223500, 140000,  ..., 266500, 142125, 147500]])

In [10]:

class Model(Module):
    def __init__(self,hidden=256):
        super().__init__()
        self.hidden = hidden
        self.lstm1 = LSTMCell(1,hidden).to(device)
        self.lstm2 = LSTMCell(hidden,hidden).to(device)
        self.lstm3 = LSTMCell(hidden,hidden).to(device)
        self.linear1 = Linear(hidden,1).to(device)
    
    def forward(self,X,future=0):
        preds = []
        batch_size = X.size(0)
        h_t1 = torch.zeros(batch_size,self.hidden,dtype=torch.float32)
        c_t1 = torch.zeros(batch_size,self.hidden,dtype=torch.float32)
        h_t2 = torch.zeros(batch_size,self.hidden,dtype=torch.float32)
        c_t2 = torch.zeros(batch_size,self.hidden,dtype=torch.float32)
        h_t3 = torch.zeros(batch_size,self.hidden,dtype=torch.float32)
        c_t3 = torch.zeros(batch_size,self.hidden,dtype=torch.float32)
        h_t1 = h_t1.to(device)
        c_t1 = c_t1.to(device)
        h_t2 = h_t2.to(device)
        c_t2 = c_t2.to(device)
        h_t3 = h_t3.to(device)
        c_t3 = c_t3.to(device)
        for X_batch in X.split(1,dim=1):
            X_batch = X_batch.to(device)
            h_t1,c_t1 = self.lstm1(X_batch,(h_t1,c_t1))
            h_t1 = h_t1.to(device)
            c_t1 = c_t1.to(device)
            h_t2,c_t2 = self.lstm2(h_t1,(h_t2,c_t2))
            h_t2 = h_t2.to(device)
            c_t2 = c_t2.to(device)
            h_t3,c_t3 = self.lstm3(h_t2,(h_t3,c_t3))
            h_t3 = h_t3.to(device)
            c_t3 = c_t3.to(device)
            pred = self.linear1(h_t3)
            preds.append(pred)
        for i in range(future):
            h_t1,c_t1 = self.lstm1(X_batch,(h_t1,c_t1))
            h_t1 = h_t1.to(device)
            c_t1 = c_t1.to(device)
            h_t2,c_t2 = self.lstm2(h_t1,(h_t2,c_t2))
            h_t2 = h_t2.to(device)
            c_t2 = c_t2.to(device)
            h_t3,c_t3 = self.lstm3(h_t2,(h_t3,c_t3))
            h_t3 = h_t3.to(device)
            c_t3 = c_t3.to(device)
            pred = self.linear1(h_t3)
            preds.append(pred)
        preds = torch.cat(preds,dim=1)
        return preds

In [11]:
model = Model()
optimizer = LBFGS(model.parameters(),lr=0.8)
criterion = MSELoss()

In [12]:
epochs = 100

In [13]:
wandb.init(project=PROJECT_NAME,name='test')
for _ in tqdm(range(epochs)):
    def closure():
        optimizer.zero_grad()
        preds = model(data_input.float())
        loss = criterion(preds.float().to(device),data_target.float().to(device))
        wandb.log({'Loss':loss.item()})
        loss.backward()
        return loss
    optimizer.step(closure)
    with torch.no_grad():
        future = 100
        preds = model(data_input.float(),future)
        loss = criterion(preds[:,:-future].float().to(device),data_target.float().to(device))
        wandb.log({'Val Loss':loss.item()})
        preds = preds.cpu().detach().numpy()
    plt.figure(figsize=(12,7))
    batch_size = data_input.shape[1]
    def draw(y):
        plt.plot(np.arange(batch_size),data_target.cpu().view(-1),'b')
        plt.plot(np.arange(batch_size,batch_size+future),y[batch_size:],'r')
    draw(preds[0])
    plt.savefig('./img.png')
    plt.close()
    wandb.log({'Img':wandb.Image(cv2.imread('./img.png'))})
wandb.watch(model)
wandb.finish()

[34m[1mwandb[0m: W&B API key is configured (use `wandb login --relogin` to force relogin)


100%|██████████| 100/100 [11:25<00:00,  6.85s/it]


VBox(children=(Label(value=' 2.95MB of 2.98MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.98996398321…

0,1
Loss,39016321024.0
_runtime,692.0
_timestamp,1629823687.0
_step,797.0
Val Loss,39016165376.0


0,1
Loss,████████████████████████▇▇▇▇▇▇▆▆▆▆▅▄▄▃▂▁
_runtime,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇███
_timestamp,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇███
_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
Val Loss,███████████████████████████████████▇▇▆▄▁


In [14]:
preds

array([[ 7.0073304, 17.38561  , 29.394089 , ..., 55.576523 , 55.576523 ,
        55.576523 ]], dtype=float32)

In [15]:
test_data = pd.read_csv('./data/test.csv')

In [16]:
len(test_data)

1459

In [17]:
test_data

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition
0,1461,20,RH,80.0,11622,Pave,,Reg,Lvl,AllPub,...,120,0,,MnPrv,,0,6,2010,WD,Normal
1,1462,20,RL,81.0,14267,Pave,,IR1,Lvl,AllPub,...,0,0,,,Gar2,12500,6,2010,WD,Normal
2,1463,60,RL,74.0,13830,Pave,,IR1,Lvl,AllPub,...,0,0,,MnPrv,,0,3,2010,WD,Normal
3,1464,60,RL,78.0,9978,Pave,,IR1,Lvl,AllPub,...,0,0,,,,0,6,2010,WD,Normal
4,1465,120,RL,43.0,5005,Pave,,IR1,HLS,AllPub,...,144,0,,,,0,1,2010,WD,Normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1454,2915,160,RM,21.0,1936,Pave,,Reg,Lvl,AllPub,...,0,0,,,,0,6,2006,WD,Normal
1455,2916,160,RM,21.0,1894,Pave,,Reg,Lvl,AllPub,...,0,0,,,,0,4,2006,WD,Abnorml
1456,2917,20,RL,160.0,20000,Pave,,Reg,Lvl,AllPub,...,0,0,,,,0,9,2006,WD,Abnorml
1457,2918,85,RL,62.0,10441,Pave,,Reg,Lvl,AllPub,...,0,0,,MnPrv,Shed,700,7,2006,WD,Normal


In [18]:
ids = test_data['Id']

In [19]:
with torch.no_grad():
    future = 1459
    preds = model(data_input.float(),future)
    preds = preds.cpu().detach().numpy()

In [20]:
pd.DataFrame({'Id':ids,'SalePrice':preds[:,:-future].reshape(-1)}).to_csv('./submission.csv',index=False)

In [21]:
preprocessings = [StandardScaler,RobustScaler,MinMaxScaler,MaxAbsScaler,Normalizer]

In [22]:
# for pre in preprocessings:
#     wandb.init(project=PROJECT_NAME,name=f'{pre}')
#     for _ in tqdm(range(epochs)):
#         def closure():
#             optimizer.zero_grad()
#             preds = model(data_input.float())
#             loss = criterion(preds.float().to(device),data_target.float().to(device))
#             wandb.log({'Loss':loss.item()})
#             loss.backward()
#             return loss
#         optimizer.step(closure)
#         with torch.no_grad():
#             future = 100
#             preds = model(data_input.float(),future)
#             loss = criterion(preds[:,:-future].float().to(device),data_target.float().to(device))
#             wandb.log({'Val Loss':loss.item()})
#             preds = preds.cpu().detach().numpy()
#         plt.figure(figsize=(12,7))
#         batch_size = data_input.shape[1]
#         def draw(y):
#             plt.plot(np.arange(batch_size),data_target.cpu().view(-1),'b')
#             plt.plot(np.arange(batch_size,batch_size+future),y[batch_size:],'r')
#         draw(preds[0])
#         plt.savefig('./img.png')
#         plt.close()
#         wandb.log({'Img':wandb.Image(cv2.imread('./img.png'))})
#     wandb.watch(model)
#     wandb.finish()

Problem at: <ipython-input-22-4aa60aac94b2> 2 <module>


KeyboardInterrupt: 