In [27]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns


In [28]:
df = pd.read_csv("final_airbnb_dataset.csv")

df.columns.tolist()



['id',
 'listing_url',
 'scrape_id',
 'last_scraped',
 'source',
 'name',
 'host_id',
 'host_url',
 'host_since',
 'host_response_time',
 'host_response_rate',
 'host_acceptance_rate',
 'host_is_superhost',
 'host_neighbourhood',
 'host_listings_count',
 'host_total_listings_count',
 'host_verifications',
 'host_has_profile_pic',
 'host_identity_verified',
 'neighbourhood',
 'neighbourhood_cleansed',
 'neighbourhood_group_cleansed',
 'latitude',
 'longitude',
 'property_type',
 'room_type',
 'accommodates',
 'bathrooms',
 'bathrooms_text',
 'bedrooms',
 'beds',
 'amenities',
 'price',
 'minimum_nights',
 'maximum_nights',
 'minimum_minimum_nights',
 'maximum_minimum_nights',
 'minimum_maximum_nights',
 'maximum_maximum_nights',
 'minimum_nights_avg_ntm',
 'maximum_nights_avg_ntm',
 'has_availability',
 'availability_30',
 'availability_60',
 'availability_90',
 'availability_365',
 'calendar_last_scraped',
 'number_of_reviews',
 'number_of_reviews_ltm',
 'number_of_reviews_l30d',
 'rev

In [29]:
features = [
    'distance_score', 'price_score', 'number_of_reviews',
    'availability_365', 'review_scores_rating',
]

df_model = df.dropna(subset=features + ['booked'])

X = df_model[features]
y = df_model['booked'].astype(int)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [30]:
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values.reshape(-1, 1), dtype=torch.float32)
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values.reshape(-1, 1), dtype=torch.float32)

In [31]:
class AirbnbModel(nn.Module):
    def __init__(self, input_dim):
        super(AirbnbModel, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.net(x)

# Initialize model, loss, optimizer
model = AirbnbModel(X_train.shape[1])
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [32]:
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()
    if (epoch+1) % 10 == 0:
        print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

# Predict probabilities on the full dataset
# Make predictions
with torch.no_grad():
    all_features_tensor = torch.tensor(df_model[features].values, dtype=torch.float32)
    probs = model(all_features_tensor).numpy().flatten()

# Ensure the shapes match
assert len(probs) == len(df_model), "Shape mismatch: probs doesn't align with df_model"

# Assign predictions
df_model = df_model.copy()
df_model['predicted_proba'] = probs

# Confirm it worked
print(df_model.columns)
print(df_model[['predicted_proba']].head())


Epoch 10, Loss: 0.6588
Epoch 20, Loss: 0.6164
Epoch 30, Loss: 0.5743
Epoch 40, Loss: 0.5438
Epoch 50, Loss: 0.5240
Epoch 60, Loss: 0.5091
Epoch 70, Loss: 0.4993
Epoch 80, Loss: 0.4948
Epoch 90, Loss: 0.4918
Epoch 100, Loss: 0.4905
Index(['id', 'listing_url', 'scrape_id', 'last_scraped', 'source', 'name',
       'host_id', 'host_url', 'host_since', 'host_response_time',
       'host_response_rate', 'host_acceptance_rate', 'host_is_superhost',
       'host_neighbourhood', 'host_listings_count',
       'host_total_listings_count', 'host_verifications',
       'host_has_profile_pic', 'host_identity_verified', 'neighbourhood',
       'neighbourhood_cleansed', 'neighbourhood_group_cleansed', 'latitude',
       'longitude', 'property_type', 'room_type', 'accommodates', 'bathrooms',
       'bathrooms_text', 'bedrooms', 'beds', 'amenities', 'price',
       'minimum_nights', 'maximum_nights', 'minimum_minimum_nights',
       'maximum_minimum_nights', 'minimum_maximum_nights',
       'maximum_max

In [33]:
print(df_model.columns.tolist())

['id', 'listing_url', 'scrape_id', 'last_scraped', 'source', 'name', 'host_id', 'host_url', 'host_since', 'host_response_time', 'host_response_rate', 'host_acceptance_rate', 'host_is_superhost', 'host_neighbourhood', 'host_listings_count', 'host_total_listings_count', 'host_verifications', 'host_has_profile_pic', 'host_identity_verified', 'neighbourhood', 'neighbourhood_cleansed', 'neighbourhood_group_cleansed', 'latitude', 'longitude', 'property_type', 'room_type', 'accommodates', 'bathrooms', 'bathrooms_text', 'bedrooms', 'beds', 'amenities', 'price', 'minimum_nights', 'maximum_nights', 'minimum_minimum_nights', 'maximum_minimum_nights', 'minimum_maximum_nights', 'maximum_maximum_nights', 'minimum_nights_avg_ntm', 'maximum_nights_avg_ntm', 'has_availability', 'availability_30', 'availability_60', 'availability_90', 'availability_365', 'calendar_last_scraped', 'number_of_reviews', 'number_of_reviews_ltm', 'number_of_reviews_l30d', 'review_scores_rating', 'review_scores_accuracy', 'rev

In [34]:
top_10 = df_model.sort_values(by='predicted_proba', ascending=False).head(10)
top_10_links = top_10[['id', 'listing_url', 'predicted_proba']]

print("\n🔗 Top 10 Recommended Airbnbs with URLs:\n")
for _, row in top_10_links.iterrows():
    print(f"ID: {row['id']} | Score: {row['predicted_proba']:.2f} | URL: {row['listing_url']}")



🔗 Top 10 Recommended Airbnbs with URLs:

ID: 43388308 | Score: 0.47 | URL: https://www.airbnb.com/rooms/43388308
ID: 1164402494958427969 | Score: 0.47 | URL: https://www.airbnb.com/rooms/1164402494958427969
ID: 42761887 | Score: 0.47 | URL: https://www.airbnb.com/rooms/42761887
ID: 942840241210009215 | Score: 0.47 | URL: https://www.airbnb.com/rooms/942840241210009215
ID: 1089725892764328452 | Score: 0.46 | URL: https://www.airbnb.com/rooms/1089725892764328452
ID: 608065350562145555 | Score: 0.46 | URL: https://www.airbnb.com/rooms/608065350562145555
ID: 858697692672545141 | Score: 0.46 | URL: https://www.airbnb.com/rooms/858697692672545141
ID: 979806126336543822 | Score: 0.46 | URL: https://www.airbnb.com/rooms/979806126336543822
ID: 1137593997555636834 | Score: 0.46 | URL: https://www.airbnb.com/rooms/1137593997555636834
ID: 987725019122308477 | Score: 0.46 | URL: https://www.airbnb.com/rooms/987725019122308477
