In [1]:
## Load the data

In [2]:
import pandas as pd

In [3]:
data = pd.read_csv('./data.csv')

In [4]:
## Clean the data

In [5]:
data.columns

Index(['date', 'price', 'bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot',
       'floors', 'waterfront', 'view', 'condition', 'sqft_above',
       'sqft_basement', 'yr_built', 'yr_renovated', 'street', 'city',
       'statezip', 'country'],
      dtype='object')

In [6]:
data.drop(['sqft_living','sqft_lot','waterfront','view','condition','sqft_above','sqft_basement','street','city','statezip','country'],axis=1,inplace=True)

In [7]:
data.drop('date',axis=1,inplace=True)

In [8]:
data.head()

       price  bedrooms  bathrooms  floors  yr_built  yr_renovated
0   313000.0       3.0       1.50     1.5      1955          2005
1  2384000.0       5.0       2.50     2.0      1921             0
2   342000.0       3.0       2.00     1.0      1966             0
3   420000.0       3.0       2.25     1.0      1963             0
4   550000.0       4.0       2.50     1.0      1976          1992

In [9]:
## Feature Enginnering

In [10]:
def fe(data,col):
    print(len(data))
    max_no = data[col].quantile(0.99)
    min_no = data[col].quantile(0.05)
    data = data[data[col] > min_no]
    data = data[data[col] < max_no]
    print(len(data))
    return data

In [11]:
for col in list(data.columns):
    print(col)
    data = fe(data,'price')

In [12]:
data.head()

      price  bedrooms  bathrooms  floors  yr_built  yr_renovated
2  342000.0       3.0       2.00     1.0      1966             0
3  420000.0       3.0       2.25     1.0      1963             0
4  550000.0       4.0       2.50     1.0      1976          1992
5  490000.0       2.0       1.00     1.0      1938          1994
6  335000.0       2.0       2.00     1.0      1976             0

In [13]:
X = data.drop('price',axis=1)
y = data['price']

In [14]:
from sklearn.model_selection import train_test_split

In [15]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.25)

In [16]:
len(X_train),len(X_test)

(2367, 789)

In [17]:
## Modelling

In [18]:
import torch
import torch.nn as nn
import torch.optim as optim

In [19]:
class BaseLine_Model(nn.Module):
    def __init__(self,input_shape,output_shape):
        super().__init__()
        self.fc1 = nn.Linear(input_shape,32)
        self.fc2 = nn.Linear(32,64)
        self.fc3 = nn.Linear(64,128)
        self.fc4 = nn.Linear(128,64)
        self.fc5 = nn.Linear(64,output_shape)
    
    def forward(self,X):
        preds = self.fc1(X)
        preds = self.fc2(preds)
        preds = self.fc3(preds)
        preds = self.fc4(preds)
        preds = self.fc5(preds)
        return preds

In [20]:
EPOCHS = 100

In [21]:
import wandb

In [22]:
BATCH_SIZE = 32

In [23]:
PROJECT_NAME = 'House-Price-Pred'

In [24]:
from tqdm import tqdm

In [25]:
device = torch.device('cuda')

In [26]:
model = BaseLine_Model(5,1).to(device)

In [27]:
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(),lr=0.1)

In [28]:
def get_loss(criterion,X,y,model):
    preds = model(X.float().to(device))
    loss = criterion(preds,y)
    return loss.item()
def get_accuracy(X,y,model):
    correct = 0
    total = 0
    for i in range(len(X)):
        pred = model(X[i].float().to(device))
        pred.to(device)
        if pred[0] == y[i]:
            correct += 1
        total += 1
    if correct == 0:
        correct += 1
    return round(correct/total,3)

In [29]:
import numpy as np

In [30]:
X_train = torch.from_numpy(np.array(X_train))
y_train = torch.from_numpy(np.array(y_train))
X_test = torch.from_numpy(np.array(X_test))
y_test = torch.from_numpy(np.array(y_test))

In [31]:
get_accuracy(X_test,y_test,model)

0.001

In [32]:
wandb.init(project=PROJECT_NAME,name='baseline')
for _ in tqdm(range(EPOCHS)):
    for i in range(0,len(X_train),BATCH_SIZE):
        X_batch = X_train[i:i+BATCH_SIZE].to(device)
        y_batch = y_train[i:i+BATCH_SIZE].to(device)
        model.to(device)
        preds = model(X_batch.float())
        preds.to(device)
        loss = criterion(preds.float(),y_batch.float())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    wandb.log({'loss':loss.item(),'val_loss':get_loss(criterion,X_test,y_test,model),'accuracy':get_accuracy(X_train,y_train,model),'val_accuracy':get_accuracy(X_test,y_test,model)})

In [33]:
def get_loss(criterion,X,y,model):
    preds = model(X.float().to(device))
    preds.to(device)
    loss = criterion(preds,y)
    return loss.item()
def get_accuracy(X,y,model):
    correct = 0
    total = 0
    for i in range(len(X)):
        pred = model(X[i].float().to(device))
        pred.to(device)
        if pred[0] == y[i]:
            correct += 1
        total += 1
    if correct == 0:
        correct += 1
    return round(correct/total,3)

In [34]:
import numpy as np

In [35]:
X_train = torch.from_numpy(np.array(X_train))
y_train = torch.from_numpy(np.array(y_train))
X_test = torch.from_numpy(np.array(X_test))
y_test = torch.from_numpy(np.array(y_test))

In [36]:
get_accuracy(X_test,y_test,model)

0.001

In [37]:
wandb.init(project=PROJECT_NAME,name='baseline')
for _ in tqdm(range(EPOCHS)):
    for i in range(0,len(X_train),BATCH_SIZE):
        X_batch = X_train[i:i+BATCH_SIZE].to(device)
        y_batch = y_train[i:i+BATCH_SIZE].to(device)
        model.to(device)
        preds = model(X_batch.float())
        preds.to(device)
        loss = criterion(preds.float(),y_batch.float())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    wandb.log({'loss':loss.item(),'val_loss':get_loss(criterion,X_test,y_test,model),'accuracy':get_accuracy(X_train,y_train,model),'val_accuracy':get_accuracy(X_test,y_test,model)})

In [38]:
def get_loss(criterion,X,y,model):
    preds = model(X.float().to(device))
    preds.to(device)
    y.to(device)
    loss = criterion(preds,y)
    return loss.item()
def get_accuracy(X,y,model):
    correct = 0
    total = 0
    for i in range(len(X)):
        pred = model(X[i].float().to(device))
        pred.to(device)
        if pred[0] == y[i]:
            correct += 1
        total += 1
    if correct == 0:
        correct += 1
    return round(correct/total,3)

In [39]:
import numpy as np

In [40]:
X_train = torch.from_numpy(np.array(X_train))
y_train = torch.from_numpy(np.array(y_train))
X_test = torch.from_numpy(np.array(X_test))
y_test = torch.from_numpy(np.array(y_test))

In [41]:
get_accuracy(X_test,y_test,model)

0.001

In [42]:
wandb.init(project=PROJECT_NAME,name='baseline')
for _ in tqdm(range(EPOCHS)):
    for i in range(0,len(X_train),BATCH_SIZE):
        X_batch = X_train[i:i+BATCH_SIZE].to(device)
        y_batch = y_train[i:i+BATCH_SIZE].to(device)
        model.to(device)
        preds = model(X_batch.float())
        preds.to(device)
        loss = criterion(preds.float(),y_batch.float())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    wandb.log({'loss':loss.item(),'val_loss':get_loss(criterion,X_test,y_test,model),'accuracy':get_accuracy(X_train,y_train,model),'val_accuracy':get_accuracy(X_test,y_test,model)})

In [43]:
def get_loss(criterion,X,y,model):
    preds = model(X.float().to(device))
    preds.to(device)
    y.to(device)
    criterion.to(device)
    loss = criterion(preds,y)
    return loss.item()
def get_accuracy(X,y,model):
    correct = 0
    total = 0
    for i in range(len(X)):
        pred = model(X[i].float().to(device))
        pred.to(device)
        if pred[0] == y[i]:
            correct += 1
        total += 1
    if correct == 0:
        correct += 1
    return round(correct/total,3)

In [44]:
import numpy as np

In [45]:
X_train = torch.from_numpy(np.array(X_train))
y_train = torch.from_numpy(np.array(y_train))
X_test = torch.from_numpy(np.array(X_test))
y_test = torch.from_numpy(np.array(y_test))

In [46]:
get_accuracy(X_test,y_test,model)

0.001

In [47]:
wandb.init(project=PROJECT_NAME,name='baseline')
for _ in tqdm(range(EPOCHS)):
    for i in range(0,len(X_train),BATCH_SIZE):
        X_batch = X_train[i:i+BATCH_SIZE].to(device)
        y_batch = y_train[i:i+BATCH_SIZE].to(device)
        model.to(device)
        preds = model(X_batch.float())
        preds.to(device)
        loss = criterion(preds.float(),y_batch.float())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    wandb.log({'loss':loss.item(),'val_loss':get_loss(criterion,X_test,y_test,model),'accuracy':get_accuracy(X_train,y_train,model),'val_accuracy':get_accuracy(X_test,y_test,model)})

In [48]:
def get_loss(criterion,X,y,model):
    preds = model(X.float().to(device))
    preds = preds.to(device)
    y = y.to(device)
#     criterion.to(device)
    loss = criterion(preds,y)
    return loss.item()
def get_accuracy(X,y,model):
    correct = 0
    total = 0
    for i in range(len(X)):
        pred = model(X[i].float().to(device))
        pred.to(device)
        if pred[0] == y[i]:
            correct += 1
        total += 1
    if correct == 0:
        correct += 1
    return round(correct/total,3)

In [49]:
import numpy as np

In [50]:
X_train = torch.from_numpy(np.array(X_train))
y_train = torch.from_numpy(np.array(y_train))
X_test = torch.from_numpy(np.array(X_test))
y_test = torch.from_numpy(np.array(y_test))

In [51]:
get_accuracy(X_test,y_test,model)

0.001

In [52]:
wandb.init(project=PROJECT_NAME,name='baseline')
for _ in tqdm(range(EPOCHS)):
    for i in range(0,len(X_train),BATCH_SIZE):
        X_batch = X_train[i:i+BATCH_SIZE].to(device)
        y_batch = y_train[i:i+BATCH_SIZE].to(device)
        model.to(device)
        preds = model(X_batch.float())
        preds.to(device)
        loss = criterion(preds.float(),y_batch.float())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    wandb.log({'loss':loss.item(),'val_loss':get_loss(criterion,X_test,y_test,model),'accuracy':get_accuracy(X_train,y_train,model),'val_accuracy':get_accuracy(X_test,y_test,model)})

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

In [53]:
preds

tensor([[133076.4688],
        [176768.4688],
        [196184.4688],
        [378557.4688],
        [409696.9688],
        [161556.4688],
        [219646.4688],
        [149074.4688],
        [413855.9688],
        [234650.4688],
        [386601.4688],
        [328135.9688],
        [362034.4688],
        [397402.2188],
        [344394.9688],
        [351608.7188],
        [206810.4688],
        [ 77250.4609],
        [357326.7188],
        [129638.4609],
        [154028.4688],
        [333123.7188],
        [397767.9688],
        [199576.4688],
        [105760.4609],
        [383003.4688],
        [323816.2188],
        [213506.4688],
        [147956.4688],
        [283816.4688],
        [367235.7188]], device='cuda:0', grad_fn=<AddmmBackward>)

In [54]:
y_batch

tensor([365000., 450000., 660000., 455000., 562000., 575000., 367500., 441000.,
        330000., 450000., 409316., 784000., 715000., 925000., 361000., 589900.,
        860000., 400000., 396675., 347000., 342000., 330000., 740000., 533000.,
        930000., 549000., 690000., 410000., 732000., 471000., 480000.],
       device='cuda:0', dtype=torch.float64)

In [55]:
class BaseLine_Model(nn.Module):
    def __init__(self,input_shape,output_shape):
        super().__init__()
        self.fc1 = nn.Linear(input_shape,32)
        self.fc2 = nn.Linear(32,64)
        self.fc3 = nn.Linear(64,128)
        self.fc4 = nn.Linear(128,64)
        self.fc5 = nn.Linear(64,output_shape)
    
    def forward(self,X):
        preds = nn.ReLU(self.fc1(X))
        preds = nn.ReLU(self.fc2(preds))
        preds = nn.ReLU(self.fc3(preds))
        preds = nn.ReLU(self.fc4(preds))
        preds = nn.ReLU(self.fc5(preds))
        return preds

In [56]:
EPOCHS = 100

In [57]:
import wandb

In [58]:
BATCH_SIZE = 32

In [59]:
PROJECT_NAME = 'House-Price-Pred'

In [60]:
from tqdm import tqdm

In [61]:
device = torch.device('cuda')

In [62]:
model = BaseLine_Model(5,1).to(device)

In [63]:
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(),lr=0.1)

In [64]:
def get_loss(criterion,X,y,model):
    preds = model(X.float().to(device))
    preds = preds.to(device)
    y = y.to(device)
#     criterion.to(device)
    loss = criterion(preds,y)
    return loss.item()
def get_accuracy(X,y,model):
    correct = 0
    total = 0
    for i in range(len(X)):
        pred = model(X[i].float().to(device))
        pred.to(device)
        if pred[0] == y[i]:
            correct += 1
        total += 1
    if correct == 0:
        correct += 1
    return round(correct/total,3)

In [65]:
import numpy as np

In [66]:
X_train = torch.from_numpy(np.array(X_train))
y_train = torch.from_numpy(np.array(y_train))
X_test = torch.from_numpy(np.array(X_test))
y_test = torch.from_numpy(np.array(y_test))

In [67]:
get_accuracy(X_test,y_test,model)

In [68]:
## Load the data

In [69]:
import pandas as pd

In [70]:
data = pd.read_csv('./data.csv')

In [71]:
## Clean the data

In [72]:
data.columns

Index(['date', 'price', 'bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot',
       'floors', 'waterfront', 'view', 'condition', 'sqft_above',
       'sqft_basement', 'yr_built', 'yr_renovated', 'street', 'city',
       'statezip', 'country'],
      dtype='object')

In [73]:
data.drop(['sqft_living','sqft_lot','waterfront','view','condition','sqft_above','sqft_basement','street','city','statezip','country'],axis=1,inplace=True)

In [74]:
data.drop('date',axis=1,inplace=True)

In [75]:
data.head()

       price  bedrooms  bathrooms  floors  yr_built  yr_renovated
0   313000.0       3.0       1.50     1.5      1955          2005
1  2384000.0       5.0       2.50     2.0      1921             0
2   342000.0       3.0       2.00     1.0      1966             0
3   420000.0       3.0       2.25     1.0      1963             0
4   550000.0       4.0       2.50     1.0      1976          1992

In [76]:
## Feature Enginnering

In [77]:
def fe(data,col):
    print(len(data))
    max_no = data[col].quantile(0.99)
    min_no = data[col].quantile(0.05)
    data = data[data[col] > min_no]
    data = data[data[col] < max_no]
    print(len(data))
    return data

In [78]:
for col in list(data.columns):
    print(col)
    data = fe(data,'price')

In [79]:
data.head()

      price  bedrooms  bathrooms  floors  yr_built  yr_renovated
2  342000.0       3.0       2.00     1.0      1966             0
3  420000.0       3.0       2.25     1.0      1963             0
4  550000.0       4.0       2.50     1.0      1976          1992
5  490000.0       2.0       1.00     1.0      1938          1994
6  335000.0       2.0       2.00     1.0      1976             0

In [80]:
X = data.drop('price',axis=1)
y = data['price']

In [81]:
from sklearn.model_selection import train_test_split

In [82]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.25)

In [83]:
len(X_train),len(X_test)

(2367, 789)

In [84]:
## Modelling

In [85]:
import torch
import torch.nn as nn
import torch.optim as optim

In [86]:
class BaseLine_Model(nn.Module):
    def __init__(self,input_shape,output_shape):
        super().__init__()
        self.fc1 = nn.Linear(input_shape,32)
        self.fc2 = nn.Linear(32,64)
        self.fc3 = nn.Linear(64,128)
        self.fc4 = nn.Linear(128,64)
        self.fc5 = nn.Linear(64,output_shape)
    
    def forward(self,X):
        preds = self.fc1(X)
        preds = nn.ReLU(preds)
        preds = self.fc2(preds)
        preds = nn.ReLU(preds)
        preds = self.fc3(preds)
        preds = nn.ReLU(preds)
        preds = self.fc4(preds)
        preds = nn.ReLU(preds)
        preds = self.fc5(preds)
        return preds

In [87]:
EPOCHS = 100

In [88]:
import wandb

In [89]:
BATCH_SIZE = 32

In [90]:
PROJECT_NAME = 'House-Price-Pred'

In [91]:
from tqdm import tqdm

In [92]:
device = torch.device('cuda')

In [93]:
model = BaseLine_Model(5,1).to(device)

In [94]:
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(),lr=0.1)

In [95]:
def get_loss(criterion,X,y,model):
    preds = model(X.float().to(device))
    preds = preds.to(device)
    y = y.to(device)
#     criterion.to(device)
    loss = criterion(preds,y)
    return loss.item()
def get_accuracy(X,y,model):
    correct = 0
    total = 0
    for i in range(len(X)):
        pred = model(X[i].float().to(device))
        pred.to(device)
        if pred[0] == y[i]:
            correct += 1
        total += 1
    if correct == 0:
        correct += 1
    return round(correct/total,3)

In [96]:
import numpy as np

In [97]:
X_train = torch.from_numpy(np.array(X_train))
y_train = torch.from_numpy(np.array(y_train))
X_test = torch.from_numpy(np.array(X_test))
y_test = torch.from_numpy(np.array(y_test))

In [98]:
get_accuracy(X_test,y_test,model)

In [99]:
import torch.nn.functional as F

In [100]:
class BaseLine_Model(nn.Module):
    def __init__(self,input_shape,output_shape):
        super().__init__()
        self.fc1 = nn.Linear(input_shape,32)
        self.fc2 = nn.Linear(32,64)
        self.fc3 = nn.Linear(64,128)
        self.fc4 = nn.Linear(128,64)
        self.fc5 = nn.Linear(64,output_shape)
    
    def forward(self,X):
        preds = self.fc1(X)
        preds = F.relu(preds)
        preds = self.fc2(preds)
        preds = F.relu(preds)
        preds = self.fc3(preds)
        preds = F.relu(preds)
        preds = self.fc4(preds)
        preds = F.relu(preds)
        preds = self.fc5(preds)
        return preds

In [101]:
EPOCHS = 100

In [102]:
import wandb

In [103]:
BATCH_SIZE = 32

In [104]:
PROJECT_NAME = 'House-Price-Pred'

In [105]:
from tqdm import tqdm

In [106]:
device = torch.device('cuda')

In [107]:
model = BaseLine_Model(5,1).to(device)

In [108]:
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(),lr=0.1)

In [109]:
def get_loss(criterion,X,y,model):
    preds = model(X.float().to(device))
    preds = preds.to(device)
    y = y.to(device)
#     criterion.to(device)
    loss = criterion(preds,y)
    return loss.item()
def get_accuracy(X,y,model):
    correct = 0
    total = 0
    for i in range(len(X)):
        pred = model(X[i].float().to(device))
        pred.to(device)
        if pred[0] == y[i]:
            correct += 1
        total += 1
    if correct == 0:
        correct += 1
    return round(correct/total,3)

In [110]:
import numpy as np

In [111]:
X_train = torch.from_numpy(np.array(X_train))
y_train = torch.from_numpy(np.array(y_train))
X_test = torch.from_numpy(np.array(X_test))
y_test = torch.from_numpy(np.array(y_test))

In [112]:
get_accuracy(X_test,y_test,model)

0.001

In [113]:
wandb.init(project=PROJECT_NAME,name='baseline')
for _ in tqdm(range(EPOCHS)):
    for i in range(0,len(X_train),BATCH_SIZE):
        X_batch = X_train[i:i+BATCH_SIZE].to(device)
        y_batch = y_train[i:i+BATCH_SIZE].to(device)
        model.to(device)
        preds = model(X_batch.float())
        preds.to(device)
        loss = criterion(preds.float(),y_batch.float())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    wandb.log({'loss':loss.item(),'val_loss':get_loss(criterion,X_test,y_test,model),'accuracy':get_accuracy(X_train,y_train,model),'val_accuracy':get_accuracy(X_test,y_test,model)})