In [1]:
import pandas as pd

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
data=pd.read_csv('data.csv')

In [54]:
data.head()

Unnamed: 0,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,sqft_above,sqft_basement,yr_built,yr_renovated,street,city,statezip,country
0,2014-05-02 00:00:00,313000.0,3.0,1.5,1340,7912,1.5,0,0,3,1340,0,1955,2005,18810 Densmore Ave N,Shoreline,WA 98133,USA
1,2014-05-02 00:00:00,2384000.0,5.0,2.5,3650,9050,2.0,0,4,5,3370,280,1921,0,709 W Blaine St,Seattle,WA 98119,USA
2,2014-05-02 00:00:00,342000.0,3.0,2.0,1930,11947,1.0,0,0,4,1930,0,1966,0,26206-26214 143rd Ave SE,Kent,WA 98042,USA
3,2014-05-02 00:00:00,420000.0,3.0,2.25,2000,8030,1.0,0,0,4,1000,1000,1963,0,857 170th Pl NE,Bellevue,WA 98008,USA
4,2014-05-02 00:00:00,550000.0,4.0,2.5,1940,10500,1.0,0,0,4,1140,800,1976,1992,9105 170th Ave NE,Redmond,WA 98052,USA


In [55]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4600 entries, 0 to 4599
Data columns (total 18 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   date           4600 non-null   object 
 1   price          4600 non-null   float64
 2   bedrooms       4600 non-null   float64
 3   bathrooms      4600 non-null   float64
 4   sqft_living    4600 non-null   int64  
 5   sqft_lot       4600 non-null   int64  
 6   floors         4600 non-null   float64
 7   waterfront     4600 non-null   int64  
 8   view           4600 non-null   int64  
 9   condition      4600 non-null   int64  
 10  sqft_above     4600 non-null   int64  
 11  sqft_basement  4600 non-null   int64  
 12  yr_built       4600 non-null   int64  
 13  yr_renovated   4600 non-null   int64  
 14  street         4600 non-null   object 
 15  city           4600 non-null   object 
 16  statezip       4600 non-null   object 
 17  country        4600 non-null   object 
dtypes: float

In [56]:
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
import torch
from torch import nn, optim
import numpy as np

In [129]:
features = ['bedrooms', 'bathrooms', 'sqft_living',  'floors', 'view', 'sqft_above', 'sqft_basement']
X = data[features].values
y = data['price'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
y_test = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)


In [155]:
class HousePriceModel(nn.Module):
    def __init__(self):
        super(HousePriceModel, self).__init__()
        super(HousePriceModel, self).__init__()
        self.fc1 = nn.Linear(len(features), 128)
        self.bn1 = nn.BatchNorm1d(128)
        self.fc2 = nn.Linear(128, 64)
        self.bn2 = nn.BatchNorm1d(64)
        self.fc3 = nn.Linear(64, 32)
        self.bn3 = nn.BatchNorm1d(32)
        self.fc4 = nn.Linear(32, 1)
        self.dropout = nn.Dropout(0.3)

    def forward(self, x):
        x = torch.relu(self.bn1(self.fc1(x)))
        x = self.dropout(x)
        x = torch.relu(self.bn2(self.fc2(x)))
        x = self.dropout(x)
        x = torch.relu(self.bn3(self.fc3(x)))
        x = self.fc4(x)
        return x


model = HousePriceModel()


In [156]:
list(model.parameters())

[Parameter containing:
 tensor([[ 0.2871,  0.2781,  0.0498,  0.0306,  0.2403, -0.1030,  0.0745],
         [ 0.0770, -0.2729, -0.2917,  0.1980,  0.2402, -0.1910,  0.2804],
         [-0.1580, -0.1090,  0.3737, -0.1425, -0.0933,  0.2777,  0.2617],
         [-0.1531, -0.2961, -0.1429, -0.2410, -0.2940, -0.0016, -0.1798],
         [-0.0146, -0.3399,  0.2059, -0.1676,  0.0516,  0.0645, -0.1202],
         [ 0.3050, -0.2313,  0.3478,  0.1488,  0.1242, -0.3536, -0.0761],
         [ 0.2226,  0.3057,  0.0605,  0.0094,  0.0084, -0.0084,  0.1341],
         [-0.1550,  0.2129,  0.0840,  0.0326,  0.0660,  0.3470,  0.2721],
         [-0.1549,  0.0833,  0.2807, -0.1116, -0.1545,  0.0957,  0.2708],
         [-0.0777, -0.2682,  0.2703,  0.1244, -0.3415,  0.1902, -0.0070],
         [ 0.1893, -0.0967,  0.1701, -0.3400,  0.3659,  0.0126, -0.2327],
         [-0.3633,  0.1570,  0.0021, -0.2817,  0.3330, -0.2116, -0.0337],
         [ 0.1537,  0.3058, -0.2741, -0.0806, -0.1135,  0.1611, -0.3532],
         [-0.16

In [157]:
X_train

tensor([[ 0.6705,  0.4369,  0.6610,  ..., -0.3062,  1.1052, -0.6796],
        [ 0.6705,  1.0749,  1.6520,  ..., -0.3062,  2.2112, -0.6796],
        [ 0.6705,  0.4369,  0.7027,  ..., -0.3062,  1.1517, -0.6796],
        ...,
        [-0.4301, -1.4771, -1.0290,  ..., -0.3062, -0.9671, -0.3352],
        [-1.5307, -1.4771, -1.2794,  ..., -0.3062, -1.0603, -0.6796],
        [-1.5307, -1.4771, -0.6743,  ..., -0.3062, -1.1185,  0.6766]])

In [158]:
y_train

tensor([[685000.],
        [857000.],
        [675000.],
        ...,
        [290000.],
        [600000.],
        [475000.]])

In [159]:
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [171]:
num_epochs = 200
for epoch in range(num_epochs):
    model.train()
     
    outputs = model(X_train)
    loss = criterion(outputs, y_train)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    model.eval()
    with torch.inference_mode():
      test_pred = model(X_test)
      test_loss = criterion(test_pred,y_test)
    if epoch % 100 == 0:
      print(f"Epoch: {epoch} | Loss: {loss:.5f}| Test loss: {test_loss:.5f}")

Epoch: 0 | Loss: 181934620672.00000| Test loss: 1137125228544.00000
Epoch: 100 | Loss: 178129010688.00000| Test loss: 1131341545472.00000


In [172]:
with torch.inference_mode():
      test_pred = model(X_test)
      test_loss = criterion(test_pred,y_test).sum()

avg_loss=test_loss/len(X_test)

In [175]:
print("RMSE loss:",torch.sqrt(avg_loss).detach().numpy())

RMSE loss: 35006.934
