In [19]:
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

In [33]:
mean_std = pd.read_csv("../data_processing/mean_std.csv", index_col = 0)

In [24]:
checkpoint = torch.load("../test/Brian/nov22_test01/final/checkpoint_final.tar")
demo_df = pd.read_csv("../split_datasets/live_demo.csv")

test_df = pd.read_csv("../split_datasets/test.csv")
test_df = test_df.drop(columns="id")
test_df = test_df.drop(columns="last_scraped")
test_df = test_df.drop(columns="amenities")
demo_df = demo_df.drop(columns="amenities")

In [25]:
demo_df

Unnamed: 0,host_is_superhost,price,latitude,longitude,property_type,room_type,accommodates,bathrooms,bedrooms,beds,...,amenities_count,facilities_Free parking on premises,facilities_Gym,facilities_Hot tub,facilities_Pool,facilities_count,house_rules_Suitable for events,house_rules_Pets allowed,house_rules_Smoking allowed,house_rules_count
0,0,-0.881203,-0.896103,-0.061014,1.182045,1.329375,-1.165452,0.652325,-0.353343,-0.6507,...,0.293298,0,0,0,0,-0.901841,0,0,0,-0.410572
1,0,0.145206,-0.333142,0.222818,1.182045,-0.686927,-0.597957,-0.436242,-0.353343,-0.6507,...,-0.427262,0,0,0,0,-0.901841,0,0,0,-0.410572
2,0,-1.042496,1.851645,1.795044,-0.986725,1.329375,-0.597957,-0.436242,-0.353343,-0.6507,...,-0.427262,0,0,0,0,-0.901841,0,0,0,-0.410572
3,1,-0.617269,2.36093,0.983627,-0.444532,-0.686927,1.104526,0.652325,0.953672,0.389812,...,0.293298,0,0,0,0,-0.901841,0,0,0,-0.410572
4,1,0.541107,-0.491634,-1.055418,1.182045,-0.686927,-0.597957,-0.436242,-0.353343,-0.6507,...,0.293298,0,0,0,0,-0.901841,0,0,0,-0.410572
5,0,-0.455976,-0.276603,-2.584298,1.182045,-0.686927,-0.597957,-0.436242,-0.353343,-0.6507,...,0.653578,0,0,0,0,-0.901841,0,0,0,-0.410572
6,1,-0.807888,-0.351967,-0.797598,2.266429,-0.686927,-0.597957,-0.436242,-1.660357,-0.6507,...,-0.066982,0,0,0,0,-0.901841,0,0,0,-0.410572
7,0,-0.734573,0.303578,-1.091032,,-0.686927,-0.597957,-0.436242,-0.353343,-0.6507,...,0.653578,0,0,0,0,-0.901841,0,0,0,-0.410572
8,0,3.371063,0.751704,0.044698,0.09766,-0.686927,1.672021,5.006591,2.260687,1.430323,...,-0.066982,0,0,0,0,-0.901841,0,0,0,-0.410572
9,1,-0.236032,-0.684738,0.100713,1.182045,3.345677,-0.597957,-0.436242,-0.353343,-0.6507,...,0.653578,0,0,0,0,-0.901841,0,0,0,-0.410572


In [31]:
def get_error(net, train_loader, threshold):
    train_samples = 0
    train_error = 0
    train_accuracy = [0 for i in threshold]
    for i, data in enumerate(train_loader):
        sample = data["sample"].float()
        label = data["label"].float().view(-1)
        output = net(sample).view(-1)
        error = abs(label-output)*mean_std["std"]["price"]
        for j in range(len(threshold)):
            accurate = error < float(threshold[j])
            train_accuracy[j] += accurate.sum().item()
        train_samples += len(label)
        train_error += error.sum().item()
    avg_train_error = float(train_error)/float(train_samples)
    avg_train_accuracy = [float(a)/float(train_samples) for a in train_accuracy]
    
    return avg_train_error, avg_train_accuracy

In [9]:
class airbnb_net (nn.Module):
    def __init__(self):
        super(airbnb_net, self).__init__()
        self.layer1 = nn.Linear(41, 41*3)
        self.layer2 = nn.Linear(41*3, 41*3)
        self.layer3 = nn.Linear(41*3, 41*2)
        self.layer4 = nn.Linear(41*2, 41*2)
        self.layer5 = nn.Linear(41*2, 41*1)
        self.layer6 = nn.Linear(41*1, 1)
        
        #self.dropout1 = nn.Dropout(p=0.😎
        self.dropout2 = nn.Dropout(p=0.1)
    
    def forward(self, x):
        x = torch.relu(self.layer1(x))
        x = torch.relu(self.layer2(x))
        x = self.dropout2(x)
        x = torch.relu(self.layer3(x))
        x = self.dropout2(x)
        x = torch.relu(self.layer4(x))
        #x = self.dropout2(x)
        x = torch.relu(self.layer5(x))
        return self.layer6(x)

In [35]:
model = airbnb_net()
model.load_state_dict(checkpoint['model_state_dict'])

<All keys matched successfully>

In [26]:
class airbnb_dataset (Dataset):
    def __init__(self, df, purpose):
        self.price = torch.from_numpy(df["price"].to_numpy())
        self.data = torch.from_numpy(df.drop(columns="price").to_numpy())
        self.purpose = purpose
        
    def __len__(self):
        return len(self.price)
    
    def __getitem__(self, index):
        # Returns specific sample as a dict
        if torch.is_tensor(index):
            index = index.tolist()
        sample = self.data[index]
        label = self.price[index]
        return {"sample": sample, "label": label}
#print(train_df)
    

test_ds = airbnb_dataset(test_df, "test")
demo_ds = airbnb_dataset(demo_df, "demo")

demo_loader = DataLoader(demo_ds, batch_size=1000, shuffle=True, num_workers=0)
test_loader = DataLoader(test_ds, batch_size=1000, shuffle=True, num_workers=0)

In [36]:
get_error(model, test_loader, [10, 20, 30])

(17.978561375888553,
 [0.5022661132579552, 0.7245837507752493, 0.8279662229855446])

In [37]:
get_error(model, demo_loader, [10, 20, 30])

(nan, [0.2, 0.4, 0.6])