In [86]:
import torch
from tqdm import tqdm
from torch import nn, optim
from torch.nn import init
import pandas as pd
import pandas as pd
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

data = pd.read_csv("../../data2/data.csv").drop(columns = ["id", "source", 
                                                       "latitude", "longitude"])

In [98]:
class TabularFFNNSimple(nn.Module):
    def __init__(self, input_size, output_size, dropout_prob=0.18):
        super(TabularFFNNSimple, self).__init__()
        hidden_size = 32
        self.ffnn = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, hidden_size // 2),
            nn.Dropout(0.01),
            nn.ReLU(),
            nn.Linear(hidden_size // 2, output_size)
        )
        
        for m in self.ffnn:
            if isinstance(m, nn.Linear):
                init.xavier_uniform_(m.weight)
                m.bias.data.fill_(0)

    def forward(self, x):
        x = x.float()
        # print(x)
        x = x.view(x.size(0), -1)
        x = self.ffnn(x)
        return x
    
# Split the data into features and target
X = data.drop('price', axis=1)
y = data['price']

# Standardize the features
device = torch.device("cpu")
# Convert to PyTorch tensors
X_tensor = torch.tensor(X.to_numpy(), dtype=torch.float32, device = device)
y_tensor = torch.tensor(y.values, dtype=torch.float32, device = device)


# Split the data into training and combined validation and testing sets
X_train, X_val_test, y_train, y_val_test = train_test_split(X_tensor, y_tensor,
                                                            test_size=0.1, random_state=42)

# Split the combined validation and testing sets
X_val, X_test, y_val, y_test = train_test_split(X_val_test, y_val_test, test_size=0.5, random_state=42)

# Create DataLoader for training, validation, and testing
train_data = TensorDataset(X_train, y_train)
val_data = TensorDataset(X_val, y_val)
test_data = TensorDataset(X_test, y_test)

batch_size = 256
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

# Check if the dimensions match the expected input size for the model
input_size = X_train.shape[1]

# Output
# input_size, train_loader, test_loader

model = TabularFFNNSimple(
    input_size = input_size,
    output_size = 1
)
model.to(device)

num_epochs = 300000
train_losses = []
val_losses = []
epochs_suc = [] # to have a reference to it
grad_norms = []

def get_gradient_norm(model):
    total_norm = 0
    for p in model.parameters():
        if p.grad is not None:
            param_norm = p.grad.data.norm(2)
            total_norm += param_norm.item() ** 2
    total_norm = total_norm ** 0.5
    return total_norm

In [102]:
optimizer = optim.Adam(
    model.parameters(), 
    lr=9e-4,
    weight_decay=0.06
)
criterion = torch.nn.MSELoss()
criterion_abs = torch.nn.L1Loss()
criterion = criterion_abs

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, 
    mode='min', 
    factor=0.999999, 
    patience=2, 
    verbose=True
)

for epoch in range(num_epochs):
    # Training
    model.train()  # Set the model to training mode
    running_loss = 0.0
    l1_losses = []
    grad_norm = 0
    for tuple_ in train_loader:
        datas, prices = tuple_
        optimizer.zero_grad()
        outputs = model(datas)
        prices_viewed = prices.view(-1, 1).float()
        loss = criterion(outputs, prices_viewed)
        loss.backward()
        grad_norm += get_gradient_norm(model)
        optimizer.step()

        running_loss += loss.item()
        
    grad_norms.append(grad_norm / len(train_loader))
    train_losses.append(running_loss / len(train_loader))  # Average loss for this epoch

    # Validation
    model.eval()  # Set the model to evaluation mode
    val_loss = 0.0
    with torch.no_grad():  # Disable gradient calculation
        for tuple_ in val_loader:
            datas, prices = tuple_
            outputs = model(datas)  # Forward pass
            prices_viewed = prices.view(-1, 1).float()
            loss = criterion(outputs, prices_viewed)  # Compute loss
            val_loss += loss.item()  # Accumulate the loss
            l1_losses.append(criterion_abs(outputs, prices_viewed))

    val_losses.append(val_loss / len(val_loader))  # Average loss for this epoch
    l1_mean_loss = sum(l1_losses) / len(l1_losses)

    epochs_suc.append(epoch)
    scheduler.step(val_losses[-1])
    
    if epoch % 100 == 0:
        tl = f"Training Loss: {int(train_losses[-1])}"
        vl = f"Validation Loss: {int(val_losses[-1])}"
        l1 = f"L1: {int(l1_mean_loss)}"
        dl = f'Epoch {epoch+1}, {tl}, {vl}, {grad_norms[-1]}'
        print(dl)

Epoch 1, Training Loss: 32165, Validation Loss: 42276, 437897.68338317826
Epoch 101, Training Loss: 31279, Validation Loss: 41893, 374602.61641755054
Epoch 201, Training Loss: 31801, Validation Loss: 41937, 483587.3804459777
Epoch 301, Training Loss: 31445, Validation Loss: 41965, 587991.6689905802
Epoch 401, Training Loss: 31279, Validation Loss: 42080, 506938.10011778114
Epoch 501, Training Loss: 30985, Validation Loss: 41876, 536733.7570460455
Epoch 601, Training Loss: 31468, Validation Loss: 41993, 542780.3244692485
Epoch 701, Training Loss: 31576, Validation Loss: 42330, 561904.3480680472
Epoch 801, Training Loss: 31416, Validation Loss: 42203, 501779.37233989174
Epoch 901, Training Loss: 31478, Validation Loss: 41834, 333938.665860009
Epoch 1001, Training Loss: 31486, Validation Loss: 42181, 430222.5523783317
Epoch 1101, Training Loss: 30600, Validation Loss: 41897, 243963.2182260701
Epoch 1201, Training Loss: 30897, Validation Loss: 42032, 526832.4444273484
Epoch 1301, Training 

Epoch 10801, Training Loss: 31629, Validation Loss: 42119, 1032734.7375985411
Epoch 10901, Training Loss: 31605, Validation Loss: 41857, 753019.5123561736
Epoch 11001, Training Loss: 31118, Validation Loss: 41860, 449214.02879497834
Epoch 11101, Training Loss: 31363, Validation Loss: 41582, 789938.5353313305
Epoch 11201, Training Loss: 30741, Validation Loss: 42026, 679258.934755355
Epoch 11301, Training Loss: 31422, Validation Loss: 41665, 716804.3539988119
Epoch 11401, Training Loss: 31079, Validation Loss: 41995, 470441.674722189
Epoch 11501, Training Loss: 31316, Validation Loss: 42294, 545431.4754620875
Epoch 11601, Training Loss: 31352, Validation Loss: 42061, 484076.54099178704
Epoch 11701, Training Loss: 31338, Validation Loss: 41782, 868765.7204533253
Epoch 11801, Training Loss: 30474, Validation Loss: 41887, 943872.5895313093
Epoch 11901, Training Loss: 32067, Validation Loss: 42625, 1131767.0287534175
Epoch 12001, Training Loss: 30703, Validation Loss: 42569, 696511.57475389

Epoch 21501, Training Loss: 31575, Validation Loss: 42414, 936088.0345840035
Epoch 21601, Training Loss: 30778, Validation Loss: 42211, 581719.3969061744
Epoch 21701, Training Loss: 31820, Validation Loss: 42121, 1205134.3162629807
Epoch 21801, Training Loss: 31065, Validation Loss: 42408, 436599.7481660602
Epoch 21901, Training Loss: 30874, Validation Loss: 42288, 626788.901886286
Epoch 22001, Training Loss: 30617, Validation Loss: 42176, 643305.7673697585
Epoch 22101, Training Loss: 30653, Validation Loss: 42486, 612291.8190690164
Epoch 22201, Training Loss: 30903, Validation Loss: 41967, 570269.1611705819
Epoch 22301, Training Loss: 30651, Validation Loss: 41839, 795292.577522894
Epoch 22401, Training Loss: 30860, Validation Loss: 41709, 964765.1804871847
Epoch 22501, Training Loss: 31122, Validation Loss: 42107, 790067.0945843743
Epoch 22601, Training Loss: 30604, Validation Loss: 42293, 526161.7644963367
Epoch 22701, Training Loss: 30825, Validation Loss: 42290, 601649.5889623697


Epoch 32201, Training Loss: 30838, Validation Loss: 42324, 479827.8120811215
Epoch 32301, Training Loss: 30771, Validation Loss: 42355, 594080.6293253623
Epoch 32401, Training Loss: 30936, Validation Loss: 42216, 774688.9549125468
Epoch 32501, Training Loss: 31583, Validation Loss: 42527, 485889.76714849565
Epoch 32601, Training Loss: 31829, Validation Loss: 42209, 665046.8611159164
Epoch 32701, Training Loss: 31790, Validation Loss: 42137, 947680.2656645523
Epoch 32801, Training Loss: 31219, Validation Loss: 42374, 1230582.582260212
Epoch 32901, Training Loss: 30341, Validation Loss: 42572, 715353.9163383471
Epoch 33001, Training Loss: 31149, Validation Loss: 42399, 506650.30385517416
Epoch 33101, Training Loss: 31154, Validation Loss: 42378, 1235226.996771582
Epoch 33201, Training Loss: 30696, Validation Loss: 42684, 491587.24726365553
Epoch 33301, Training Loss: 30481, Validation Loss: 42562, 563558.0814606233
Epoch 33401, Training Loss: 31128, Validation Loss: 42408, 455654.8847529

Epoch 42901, Training Loss: 31209, Validation Loss: 42383, 1144711.0776301045
Epoch 43001, Training Loss: 30891, Validation Loss: 42537, 463873.8777855798
Epoch 43101, Training Loss: 30976, Validation Loss: 42421, 607066.268080538
Epoch 43201, Training Loss: 31039, Validation Loss: 42222, 647428.0312514767
Epoch 43301, Training Loss: 30758, Validation Loss: 42353, 573040.4948218723
Epoch 43401, Training Loss: 30184, Validation Loss: 42325, 449668.87467630516
Epoch 43501, Training Loss: 30774, Validation Loss: 42747, 730612.4392633733
Epoch 43601, Training Loss: 31550, Validation Loss: 42439, 899708.8447182985
Epoch 43701, Training Loss: 31085, Validation Loss: 42553, 784840.1410329266
Epoch 43801, Training Loss: 30638, Validation Loss: 42494, 533202.9416798225
Epoch 43901, Training Loss: 31292, Validation Loss: 42181, 1055689.8320680282
Epoch 44001, Training Loss: 31070, Validation Loss: 42562, 1026163.8894970064
Epoch 44101, Training Loss: 30392, Validation Loss: 42836, 839246.0826751

Epoch 53601, Training Loss: 30587, Validation Loss: 42594, 794502.2248879522
Epoch 53701, Training Loss: 30746, Validation Loss: 42829, 706298.361653214
Epoch 53801, Training Loss: 30681, Validation Loss: 42867, 1135092.7324402337
Epoch 53901, Training Loss: 30298, Validation Loss: 42719, 927603.8639749986
Epoch 54001, Training Loss: 30629, Validation Loss: 42954, 958717.6236856198
Epoch 54101, Training Loss: 30908, Validation Loss: 42795, 659943.7802691488
Epoch 54201, Training Loss: 29990, Validation Loss: 42595, 743012.8471107722
Epoch 54301, Training Loss: 30900, Validation Loss: 42597, 554634.7802218089
Epoch 54401, Training Loss: 30670, Validation Loss: 42726, 733791.6888535953
Epoch 54501, Training Loss: 30119, Validation Loss: 42855, 871451.3782193941
Epoch 54601, Training Loss: 30424, Validation Loss: 42667, 683716.1711564716
Epoch 54701, Training Loss: 31098, Validation Loss: 42588, 673871.7611470901
Epoch 54801, Training Loss: 31273, Validation Loss: 42579, 1428575.688940972

KeyboardInterrupt: 

In [None]:
import matplotlib.pyplot as plt

train_losses_sampled = train_losses[::10000]  # Select every 1000th value
val_losses_sampled = val_losses[::10000]      # Select every 1000th value

# Generate corresponding epoch numbers, assuming epochs_suc is your list of epoch numbers
epochs_sampled = epochs_suc[::10000]

plt.style.use("ggplot")
plt.title("Overfitted model evaluation")


# Use sampled data for plotting
plt.plot(epochs_sampled, train_losses_sampled, label='Training')
plt.plot(epochs_sampled, val_losses_sampled, label='Validation')

plt.ylabel("Mean Absolute Error")
plt.xlabel("Epoch")
# plt.yscale('log')
plt.xticks(
    range(1, epochs_sampled[-1], int(epochs_sampled[-1] / 8)),
    range(1, epochs_sampled[-1], int(epochs_sampled[-1] / 8)),
    rotation = 25
)
plt.legend()
plt.tight_layout()
# plt.show()
plt.savefig("../../visualizations/overfit_model_evaluation_full_dataset.png", dpi=800)

In [105]:
def model_statistics(model):
    total_neurons = 0
    total_weights = 0
    total_biases = 0
    total_trainable_params = 0

    for layer in model.modules():
        total_weights += torch.numel(layer.weight)

    return {
        "Total Neurons": total_neurons,
        "Total Weights": total_weights,
        "Total Biases": total_biases,
        "Total Trainable Parameters": total_trainable_params
    }

stats = model_statistics(model)
print(stats)


AttributeError: 'TabularFFNNSimple' object has no attribute 'weight'

# Saving. Good results

In [None]:
# class TabularFFNNSimple(nn.Module):
#     def __init__(self, input_size, output_size, dropout_prob=0.4):
#         super(TabularFFNNSimple, self).__init__()
#         hidden_size = 48
#         self.ffnn = nn.Sequential(
#             nn.Linear(input_size, hidden_size),
#             nn.ReLU(),
# #             nn.BatchNorm1d(hidden_size),
# #             nn.Dropout(0.5),
#             nn.Linear(hidden_size, hidden_size),
#             nn.ReLU(),
# #             nn.Dropout(0.5),
#             nn.Linear(hidden_size, output_size)
#         )
        
#         for m in self.ffnn:
#             if isinstance(m, nn.Linear):
#                 init.xavier_uniform_(m.weight)
#                 m.bias.data.fill_(0)

#     def forward(self, x):
#         x = x.float()
#         # print(x)
#         x = x.view(x.size(0), -1)
#         x = self.ffnn(x)
#         return x
    
# # Split the data into features and target
# X = data.drop('price', axis=1)
# y = data['price']

# # Standardize the features
# device = torch.device("cpu")
# # Convert to PyTorch tensors
# X_tensor = torch.tensor(X.to_numpy(), dtype=torch.float32, device = device)
# y_tensor = torch.tensor(y.values, dtype=torch.float32, device = device)


# # Split the data into training and combined validation and testing sets
# X_train, X_val_test, y_train, y_val_test = train_test_split(X_tensor, y_tensor,
#                                                             test_size=0.4, random_state=42)

# # Split the combined validation and testing sets
# X_val, X_test, y_val, y_test = train_test_split(X_val_test, y_val_test, test_size=0.5, random_state=42)

# # Create DataLoader for training, validation, and testing
# train_data = TensorDataset(X_train, y_train)
# val_data = TensorDataset(X_val, y_val)
# test_data = TensorDataset(X_test, y_test)

# batch_size = 256
# train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
# val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False)
# test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

# # Check if the dimensions match the expected input size for the model
# input_size = X_train.shape[1]

# # Output
# # input_size, train_loader, test_loader

# model = TabularFFNNSimple(
#     input_size = input_size,
#     output_size = 1
# )
# model.to(device)

# num_epochs = 300000
# train_losses = []
# val_losses = []
# epochs_suc = [] # to have a reference to it
# grad_norms = []

# def get_gradient_norm(model):
#     total_norm = 0
#     for p in model.parameters():
#         if p.grad is not None:
#             param_norm = p.grad.data.norm(2)
#             total_norm += param_norm.item() ** 2
#     total_norm = total_norm ** 0.5
#     return total_norm

# optimizer = optim.Adam(
#     model.parameters(), 
#     lr=9e-3,
#     weight_decay=1e-4
# )
# criterion = torch.nn.MSELoss()
# criterion_abs = torch.nn.L1Loss()
# criterion = criterion_abs

# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
#     optimizer, 
#     mode='min', 
#     factor=0.999999, 
#     patience=10, 
#     verbose=True
# )

# for epoch in range(num_epochs):
#     # Training
#     model.train()  # Set the model to training mode
#     running_loss = 0.0
#     l1_losses = []
#     grad_norm = 0
#     for tuple_ in train_loader:
#         datas, prices = tuple_
#         optimizer.zero_grad()
#         outputs = model(datas)
#         prices_viewed = prices.view(-1, 1).float()
#         loss = criterion(outputs, prices_viewed)
#         loss.backward()
#         grad_norm += get_gradient_norm(model)
#         optimizer.step()

#         running_loss += loss.item()
        
#     grad_norms.append(grad_norm / len(train_loader))
#     train_losses.append(running_loss / len(train_loader))  # Average loss for this epoch

#     # Validation
#     model.eval()  # Set the model to evaluation mode
#     val_loss = 0.0
#     with torch.no_grad():  # Disable gradient calculation
#         for tuple_ in val_loader:
#             datas, prices = tuple_
#             outputs = model(datas)  # Forward pass
#             prices_viewed = prices.view(-1, 1).float()
#             loss = criterion(outputs, prices_viewed)  # Compute loss
#             val_loss += loss.item()  # Accumulate the loss
#             l1_losses.append(criterion_abs(outputs, prices_viewed))

#     val_losses.append(val_loss / len(val_loader))  # Average loss for this epoch
#     l1_mean_loss = sum(l1_losses) / len(l1_losses)
#     # Print epoch's summary
#     epochs_suc.append(epoch)
#     scheduler.step(val_losses[-1])
#     if epoch % 100 == 0:
#         tl = f"Training Loss: {int(train_losses[-1])}"
#         vl = f"Validation Loss: {int(val_losses[-1])}"
#         l1 = f"L1: {int(l1_mean_loss)}"
#         dl = f'Epoch {epoch+1}, {tl}, {vl}, {grad_norms[-1]}'
#         print(dl)