In [None]:
import os
import copy
import itertools
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torch.nn.parameter import Parameter
from sklearn.model_selection import ParameterGrid, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.feature_selection import SelectFromModel
from torch.nn import Mish
import wandb

In [3]:
listing_path = 'listings_clean.csv'
listing_data = pd.read_csv(listing_path)
print(listing_data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7391 entries, 0 to 7390
Data columns (total 80 columns):
 #   Column                                                                         Non-Null Count  Dtype  
---  ------                                                                         --------------  -----  
 0   host_is_superhost                                                              7391 non-null   float64
 1   host_listings_count                                                            7391 non-null   float64
 2   host_identity_verified                                                         7391 non-null   int64  
 3   latitude                                                                       7391 non-null   float64
 4   longitude                                                                      7391 non-null   float64
 5   accommodates                                                                   7391 non-null   float64
 6   bedrooms                

In [None]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")
X = listing_data.drop(columns=['price']).values
y = listing_data['price'].values
# Set the random forest remove less important features
rf = RandomForestRegressor(random_state=42, n_estimators=100)
# Fit the model
rf.fit(X, y)
sfm = SelectFromModel(rf, threshold=0.002, prefit=True)
X_important = sfm.transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_important, y, test_size=0.2, random_state=42)
# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Convert to tensor
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1).to(device)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1).to(device)
# Create dataset
train_data = TensorDataset(X_train_tensor, y_train_tensor)
test_data = TensorDataset(X_test_tensor, y_test_tensor)

In [None]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
input_features = X_train.shape[1]
output_dim = 1
# Define the HyperNetwork
class HyperNetwork(nn.Module):
    def __init__(self, f_size=3, z_dim=64, out_size=16, in_size=16):
        # Initialize the hypernetwork
        super(HyperNetwork, self).__init__()
        # Set the dimensions of latent vector, filter size, output size and input size
        self.z_dim = z_dim
        self.f_size = f_size
        self.out_size = out_size
        self.in_size = in_size
        # Define the weights and biases for the hypernetwork
        self.w1 = Parameter(torch.fmod(torch.randn((self.z_dim, self.out_size * self.f_size * self.f_size)).cuda(), 2))
        self.b1 = Parameter(torch.fmod(torch.randn((self.out_size * self.f_size * self.f_size)).cuda(), 2))
        self.w2 = Parameter(torch.fmod(torch.randn((self.z_dim, self.in_size * self.z_dim)).cuda(), 2))
        self.b2 = Parameter(torch.fmod(torch.randn((self.in_size * self.z_dim)).cuda(), 2))

    def forward(self, z):
        h_in = torch.matmul(z, self.w2) + self.b2
        h_in = h_in.view(-1, self.z_dim)
        h_final = torch.matmul(h_in, self.w1) + self.b1
        kernel = h_final.view(self.out_size, self.in_size, self.f_size, self.f_size)
        return kernel
        
class Embedding(nn.Module):
    def __init__(self, z_num, z_dim):
        # Initialize the embedding
        super(Embedding, self).__init__()
        # Set the dimensions of latent vector and dimension
        self.z_list = nn.ParameterList()
        self.z_num = z_num
        self.z_dim = z_dim 
        h, k = self.z_num
        # Create the latent vectors
        for i in range(h):
            for j in range(k):
                self.z_list.append(Parameter(torch.fmod(torch.randn(self.z_dim).cuda(), 2)))

    # Forward pass
    def forward(self, hyper_net):
        ww = []
        h, k = self.z_num
        for i in range(h):
            w = []
            for j in range(k):
                w.append(hyper_net(self.z_list[i*k + j]))
            ww.append(torch.cat(w, dim=1))
        return torch.cat(ww, dim=0)
        
class PrimaryNetwork(nn.Module):
    def __init__(self, input_features, z_dim=64, output_dim=1, dropout=0.6):
        super(PrimaryNetwork, self).__init__()
        self.input_features = input_features
        self.z_dim = z_dim
        self.output_dim = output_dim

        # Hypernetworks
        self.hypernet1 = HyperNetwork(f_size=1, z_dim=self.z_dim, out_size=256, in_size=input_features)
        self.hypernet2 = HyperNetwork(f_size=1, z_dim=self.z_dim, out_size=512, in_size=256)

        # Embeddings for the hypernetworks
        self.embedding1 = Embedding((1, 1), z_dim)  
        self.embedding2 = Embedding((1, 1), z_dim)  

        # Output layer
        self.output_layer = nn.Sequential(
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            Mish(),
            nn.Dropout(dropout),
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            Mish(),
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            Mish(),
            nn.Linear(64, output_dim)
        )

        # Other layers
        self.activation = Mish()
        self.batch_norm1 = nn.BatchNorm1d(256)
        self.batch_norm2 = nn.BatchNorm1d(512)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        # Generate weights for the first layer using embedding and hypernetwork
        z1 = self.embedding1(self.hypernet1)
        generated_weights1 = z1.squeeze().view(256, self.input_features)
        x = F.linear(x, generated_weights1)
        x = self.batch_norm1(x)
        x = self.activation(x)
        x = self.dropout(x)

        # Generate weights for the second layer using embedding and hypernetwork
        z2 = self.embedding2(self.hypernet2)
        generated_weights2 = z2.squeeze().view(512, 256)
        x = F.linear(x, generated_weights2)
        x = self.batch_norm2(x)
        x = self.activation(x)
        x = self.dropout(x)

        # Pass through the output layer
        x = self.output_layer(x)
        return x

In [None]:
# Define training 
def train_epoch(model, device, train_loader, optimizer):
    model.train()
    for data, target in train_loader:
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.mse_loss(output, target)
        loss.backward()
        optimizer.step()

def test_epoch(model, device, test_loader):
    model.eval()
    test_loss = 0
    all_targets, all_outputs = [], []
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.mse_loss(output, target, reduction='sum').item()
            all_targets.append(target.cpu().numpy())
            all_outputs.append(output.cpu().numpy())
    # Calculate the loss and metrics
    test_loss /= len(test_loader.dataset)
    rmse = np.sqrt(test_loss)
    all_targets = np.concatenate(all_targets)
    all_outputs = np.concatenate(all_outputs)
    r2 = r2_score(all_targets, all_outputs)
    return rmse, r2

In [None]:
# Login into wandb
wandb.login(key="xxx")
os.environ["WANDB_SILENT"] = "true"

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mfrancescogiannuzzo2002-fg[0m ([33mmldlfragian[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Hyperparameters for tuning

In [None]:
# Sweep configuration hyperparameters
sweep_config = {
    "method": "grid",
    "metric": {"name": "rmse", "goal": "minimize"},
    "parameters": {
        "learning_rate": {"values": [0.001, 0.01]},
        "weight_decay": {"values": [1e-7, 1e-6, 1e-3]},
        "dropout": {"values": [0.4, 0.5, 0.6]},
        "batch_size": {"values": [64, 128, 256]}
    },
}

In [None]:
# Initialize the sweep
sweep_id = wandb.sweep(sweep_config, project="grid_tuning_h_MALIS_f")
results = []

def train_sweep(config=None):
        wandb.init(project="grid_search_tuning_h_MALIS_f")
        config = wandb.config
        # Create the data loaders
        train_loader = DataLoader(train_data, batch_size=config.batch_size, shuffle=True)
        test_loader = DataLoader(test_data, batch_size=config.batch_size, shuffle=False)
        # Create the model and optimizer
        model = PrimaryNetwork(input_features=X_train.shape[1], z_dim=X_train.shape[1], output_dim=1, dropout = config.dropout).to(device)
        optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay, alpha=0.9, eps=1e-16)

        for epoch in range(1, 501):
            train_epoch(model, device, train_loader, optimizer)
            
        rmse, r2 = test_epoch(model, device, test_loader)
        wandb.log({"rmse": rmse, "r2": r2})
        # Save the results
        results.append({
            "rmse": rmse,
            "r2": r2,
            "config": {
                "learning_rate": config.learning_rate,
                "weight_decay": config.weight_decay,
                "dropout": config.dropout,
                "batch_size": config.batch_size,
            },
        })

wandb.agent(sweep_id, function=train_sweep)
wandb.finish()

[34m[1mwandb[0m: Agent Starting Run: eb21cj1p with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	weight_decay: 1e-07


In [None]:
# Sort the results
results = sorted(results, key=lambda x: x["rmse"])
print("Top 5:")
for i, res in enumerate(results[:5]):
    print(f"Rank {i+1}: RMSE={res['rmse']},R2={res['r2']}, Config={res['config']}")

Top 5:
Rank 1: RMSE=97.50132495068956,R2=0.755596867669451, Config={'learning_rate': 0.001, 'weight_decay': 1e-06, 'dropout': 0.4, 'batch_size': 128}
Rank 2: RMSE=97.5669556294437,R2=0.7552677303550801, Config={'learning_rate': 0.001, 'weight_decay': 1e-07, 'dropout': 0.5, 'batch_size': 128}
Rank 3: RMSE=97.8157394386255,R2=0.7540180694612296, Config={'learning_rate': 0.01, 'weight_decay': 1e-07, 'dropout': 0.5, 'batch_size': 64}
Rank 4: RMSE=98.15831696457059,R2=0.7522920574605144, Config={'learning_rate': 0.001, 'weight_decay': 1e-07, 'dropout': 0.4, 'batch_size': 256}
Rank 5: RMSE=98.34853368037119,R2=0.7513310854949058, Config={'learning_rate': 0.001, 'weight_decay': 1e-07, 'dropout': 0.6, 'batch_size': 256}


In [None]:
best_results = []
# Retrain the top 5 configurations for 3000 epochs
for i, best_config in enumerate(results[:5]):
    wandb.init(project="hypernetwork_h_best_regression_final", name=f"config_{i+1}", settings=wandb.Settings(silent="true"))
    print(f"Retraining configuration {i + 1} for 3000 epochs: {best_config['config']}")
    config = best_config["config"]
    # Create the data loaders   
    train_loader = DataLoader(train_data, batch_size=config["batch_size"], shuffle=True)
    test_loader = DataLoader(test_data, batch_size=config["batch_size"], shuffle=False)
    # Create the model
    model = PrimaryNetwork(input_features=X_train.shape[1],z_dim=X_train.shape[1],output_dim=1,dropout=config["dropout"]).to(device)
    # Create the optimizer
    optimizer = optim.RMSprop(model.parameters(),lr=config["learning_rate"],weight_decay=config["weight_decay"],alpha=0.9,eps=1e-16,)
    best_rmse = float("inf")
    best_r2 = 0
    # Train the model
    for epoch in range(1, 3001):
        train_epoch(model, device, train_loader, optimizer)
        rmse, r2 = test_epoch(model, device, train_loader)
        if epoch % 50 == 0: wandb.log({"epoch": epoch, "train_rmse": rmse, "train_r2": r2})
        rmse, r2 = test_epoch(model, device, test_loader)
        if epoch % 50 == 0: wandb.log({"epoch": epoch, "test_rmse": rmse, "test_r2": r2})
        if rmse < best_rmse:
            best_rmse = rmse
            best_r2 = r2

        if epoch % 500 == 0:
            print(f"Epoch {epoch}: RMSE={rmse:.4f}, Best RMSE={best_rmse:.4f}")
    # Save the best results
    best_results.append({
        "best_rmse": best_rmse,
        "best_r2": best_r2,
        "config": config,
    })
    wandb.finish()
# Sort the best results
best_results = sorted(best_results, key=lambda x: x["best_rmse"])
print("\nBest configuration after retraining:")
print(f"RMSE={best_results[0]['best_rmse']}, R2={best_results[0]['best_r2']}, Config={best_results[0]['config']}")

Retraining configuration 1 for 3000 epochs: {'learning_rate': 0.001, 'weight_decay': 1e-06, 'dropout': 0.4, 'batch_size': 128}
Epoch 500: RMSE=101.1098, Best RMSE=97.7130
Epoch 1000: RMSE=103.4338, Best RMSE=97.7130
Epoch 1500: RMSE=103.7526, Best RMSE=97.7130
Epoch 2000: RMSE=105.4699, Best RMSE=97.7130
Epoch 2500: RMSE=102.1510, Best RMSE=97.7130
Epoch 3000: RMSE=102.5139, Best RMSE=97.7130


Retraining configuration 2 for 3000 epochs: {'learning_rate': 0.001, 'weight_decay': 1e-07, 'dropout': 0.5, 'batch_size': 128}
Epoch 500: RMSE=98.8612, Best RMSE=97.6722
Epoch 1000: RMSE=101.5494, Best RMSE=96.2735
Epoch 1500: RMSE=100.6393, Best RMSE=96.2735
Epoch 2000: RMSE=99.8567, Best RMSE=96.2735
Epoch 2500: RMSE=101.6593, Best RMSE=96.2735
Epoch 3000: RMSE=100.3760, Best RMSE=96.2735


Retraining configuration 3 for 3000 epochs: {'learning_rate': 0.01, 'weight_decay': 1e-07, 'dropout': 0.5, 'batch_size': 64}
Epoch 500: RMSE=100.7804, Best RMSE=96.2578
Epoch 1000: RMSE=103.4216, Best RMSE=96.0764
Epoch 1500: RMSE=102.0489, Best RMSE=96.0764
Epoch 2000: RMSE=102.8755, Best RMSE=96.0764
Epoch 2500: RMSE=103.3083, Best RMSE=96.0764
Epoch 3000: RMSE=101.0357, Best RMSE=96.0764


Retraining configuration 4 for 3000 epochs: {'learning_rate': 0.001, 'weight_decay': 1e-07, 'dropout': 0.4, 'batch_size': 256}
Epoch 500: RMSE=98.9728, Best RMSE=97.7416
Epoch 1000: RMSE=98.8732, Best RMSE=96.5584
Epoch 1500: RMSE=98.7185, Best RMSE=96.1015
Epoch 2000: RMSE=99.2445, Best RMSE=96.1015
Epoch 2500: RMSE=98.0827, Best RMSE=96.1015
Epoch 3000: RMSE=101.7492, Best RMSE=96.1015


Retraining configuration 5 for 3000 epochs: {'learning_rate': 0.001, 'weight_decay': 1e-07, 'dropout': 0.6, 'batch_size': 256}
Epoch 500: RMSE=102.4346, Best RMSE=100.2917
Epoch 1000: RMSE=102.8862, Best RMSE=99.3349
Epoch 1500: RMSE=102.6182, Best RMSE=98.5931
Epoch 2000: RMSE=102.5645, Best RMSE=98.2791
Epoch 2500: RMSE=99.0138, Best RMSE=97.6974
Epoch 3000: RMSE=100.8779, Best RMSE=96.7540



Best configuration after retraining:
RMSE=96.07640015470915, R2=0.7626882875010563, Config={'learning_rate': 0.01, 'weight_decay': 1e-07, 'dropout': 0.5, 'batch_size': 64}
