In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from analysis.datasets import load_entsoe
from analysis.splits import to_train_validation_test_data
from analysis.transformations import scale_power_data
from tabpfn import TabPFNRegressor
from analysis.transformations import add_interval_index, add_lagged_features
from torchinfo import summary
from analysis.TabPFN_copy import evaluate
import torch

# Info

### model.predict:
The `predict` method returns a dictionary containing different types of outputs based on the `output_type` parameter:
1. `main`
- a dictionary with key statistical outputs (mean, median, mode, quantiles)

2. `full`
- Includes everything from "main", plus
    - logits: returns 5000 log probabilities per prediction. This is better than only quantiles as they represent the complete set of 5000 raw probabilities and quantiles provide only specific summary points (e.g. the 0.1, 0.2, ..., 0.9 quantiles).These 5000 values can be used to obtain a cdf
    - criterion: A function to compute various probability-related metrics.


### Criterion methods
- `probs_val["criterion"].borders` returns the borders appropriate for the training data used and not the standard borders from TabPFN
- `probs_val["criterion"].forward` returns the nll (In the bar_distribution.py module: class "bar_distribution.py")
- `probs_val["criterion"].cdf`
- `probs_val["criterion"].pdf`

# Train TabPFN

In [2]:
entsoe = load_entsoe()
entsoe = scale_power_data(entsoe)
entsoe = add_lagged_features(entsoe)
entsoe = add_interval_index(entsoe)
entsoe.dropna(inplace=True)
train, validation, test = to_train_validation_test_data(entsoe, "2016-03-31 23:45:00", "2016-06-30 23:45:00")

Data loaded and transformed successfully. Shape of DataFrame: (78912, 22)
# of training observations: 8640 | 2.74%
# of validation observations: 8736 | 2.77%
# of test observations: 298173 | 94.49%


In [3]:
#feature_columns = ['power_t-96']
feature_columns = ['ws_10m_loc_mean', 'ws_100m_loc_mean']
target_column='power'

X_train, y_train = train[feature_columns], train[target_column]
X_validation, y_validation = validation[feature_columns], validation[target_column]

In [4]:
n = 10
n1 = 1000
#model = TabPFNRegressor(device='auto', ignore_pretraining_limits=True, fit_mode='low_memory', random_state=42)
model = TabPFNRegressor(device='auto', fit_mode='low_memory', random_state=42)
model.fit(X_train.head(n1), y_train.head(n1))
quantiles_custom = np.arange(0.1, 1, 0.1)

probs_val = model.predict(X_validation.head(n), output_type="full", quantiles=quantiles_custom)
logits = probs_val["logits"]
borders_new = probs_val["criterion"].borders # returns borders appropriate for the training data used and not the standard borders from TabPFN
y_values = y_validation.head(n)
all_quantiles = np.array(probs_val["quantiles"])

# Test calculation of nll in forward method

In [None]:
logits = probs_val["logits"]

# Convert logits to probabilities
probabilities = torch.nn.functional.softmax(logits, dim=1)

# Compute cumulative sum (CDF)
cumulative_probs = torch.cumsum(probabilities, dim=1)

# Define quantile thresholds (0.1 to 0.9)
quantile_thresholds = torch.tensor([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]).to(logits.device)

# Now convert to a PyTorch tensor
borders_new = torch.tensor(borders_new, dtype=torch.float32, device=logits.device)

# Find the indices where cumulative probability crosses each quantile threshold
quantile_indices = torch.searchsorted(cumulative_probs, quantile_thresholds.unsqueeze(0).expand(10, -1))

# Clamp indices to valid range
quantile_indices = torch.clamp(quantile_indices, 0, borders_new.shape[-1] - 1)

# Get corresponding border values
quantile_values = borders_new[quantile_indices]

print(quantile_values.shape)  # Should be (10, 9), where 9 is for 0.1 to 0.9 thresholds
pd.DataFrame(quantile_values)  # The border values at each quantile


torch.Size([10, 9])


  borders_new = torch.tensor(borders_new, dtype=torch.float32, device=logits.device)
  quantile_indices = torch.searchsorted(cumulative_probs, quantile_thresholds.unsqueeze(0).expand(10, -1))


Unnamed: 0,0,1,2,3,4,5,6,7,8
0,-3.77483,-3.466965,-3.226929,-3.050339,-2.954472,-2.856125,-2.731798,-2.536625,-2.207819
1,-3.785307,-3.4788,-3.238273,-3.052653,-2.953873,-2.855042,-2.728276,-2.531377,-2.19995
2,-3.778476,-3.472312,-3.234933,-3.056653,-2.959017,-2.863,-2.744166,-2.542425,-2.209263
3,-3.782006,-3.47406,-3.234391,-3.053262,-2.956259,-2.85731,-2.731798,-2.534912,-2.203537
4,-3.788583,-3.486416,-3.245193,-3.056653,-2.955706,-2.855042,-2.724775,-2.52644,-2.195717
5,-3.779741,-3.475297,-3.232218,-3.047366,-2.949797,-2.850641,-2.724255,-2.530744,-2.202745
6,-3.775386,-3.471776,-3.233281,-3.049742,-2.952693,-2.854484,-2.732399,-2.537819,-2.208515
7,-3.773643,-3.466965,-3.223161,-3.042635,-2.948617,-2.852293,-2.728276,-2.53433,-2.206457
8,-3.779741,-3.469991,-3.226929,-3.045739,-2.95096,-2.853377,-2.727683,-2.53316,-2.202745
9,-3.77722,-3.468249,-3.219162,-3.040346,-2.94697,-2.848331,-2.720032,-2.528846,-2.201345


In [73]:
#y_test = np.mean([-5.6534467, -5.637224])
y_test = np.mean([borders_new[1001], borders_new[1000]])
y_test

-3.9753823

In [74]:
prob_test = probabilities[:,1000] / (borders_new[1001] - borders_new[1000])
nll_test = -np.log(prob_test)
nll_test

tensor([1.9394, 1.9255, 1.9395, 1.9304, 1.9294, 1.9356, 1.9443, 1.9450, 1.9340,
        1.9421])

In [75]:
probs_val["criterion"].forward(logits, torch.tensor([y_test] * 10))

tensor([1.9394, 1.9255, 1.9395, 1.9304, 1.9294, 1.9356, 1.9443, 1.9450, 1.9340,
        1.9421])

In [62]:
np.exp(-6.7893)

0.0011257565279984315

In [50]:
pd.DataFrame(probabilities)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,4990,4991,4992,4993,4994,4995,4996,4997,4998,4999
0,0.0,0.002146,0.000276,0.000128,0.010622,7.4e-05,4e-05,5.1e-05,4.4e-05,3.9e-05,...,0.000153,0.000165,0.000169,0.000157,0.000169,0.000217,0.000404,0.000488,0.000932,0.010792
1,0.0,0.002267,0.00029,0.000135,0.010605,7.8e-05,4.3e-05,5.4e-05,4.6e-05,4.2e-05,...,0.000156,0.00017,0.000173,0.000162,0.000173,0.000222,0.00042,0.000502,0.000953,0.010901
2,0.0,0.002278,0.000287,0.000132,0.010685,7.5e-05,4.2e-05,5.2e-05,4.5e-05,4e-05,...,0.000153,0.000166,0.00017,0.000158,0.00017,0.000218,0.000409,0.00049,0.000937,0.010868
3,0.0,0.002235,0.000286,0.000133,0.010592,7.7e-05,4.2e-05,5.3e-05,4.6e-05,4.1e-05,...,0.000155,0.000168,0.000172,0.00016,0.000172,0.00022,0.000414,0.000496,0.000944,0.01083
4,0.0,0.002408,0.000301,0.000139,0.010693,8e-05,4.4e-05,5.6e-05,4.7e-05,4.3e-05,...,0.000158,0.000173,0.000176,0.000165,0.000176,0.000224,0.000431,0.000509,0.000968,0.011082
5,0.0,0.002213,0.000282,0.000131,0.010609,7.6e-05,4.1e-05,5.2e-05,4.5e-05,4e-05,...,0.000155,0.000168,0.000172,0.00016,0.000172,0.00022,0.000414,0.000497,0.000946,0.010851
6,0.0,0.002187,0.000281,0.000131,0.010603,7.5e-05,4.2e-05,5.2e-05,4.5e-05,4e-05,...,0.000154,0.000167,0.000171,0.000159,0.000171,0.000219,0.000411,0.000493,0.000939,0.010755
7,0.0,0.002146,0.000276,0.000129,0.010602,7.5e-05,4.1e-05,5.1e-05,4.4e-05,4e-05,...,0.000154,0.000166,0.00017,0.000158,0.00017,0.000218,0.000408,0.000492,0.000936,0.010812
8,0.0,0.002234,0.000285,0.000132,0.010583,7.7e-05,4.2e-05,5.3e-05,4.6e-05,4.1e-05,...,0.000155,0.000168,0.000171,0.00016,0.000171,0.000219,0.000413,0.000496,0.000944,0.01085
9,0.0,0.002207,0.000279,0.000129,0.010686,7.5e-05,4.1e-05,5.2e-05,4.4e-05,4e-05,...,0.000154,0.000167,0.000171,0.000159,0.000171,0.000219,0.000411,0.000495,0.000943,0.010979


In [49]:
type(probs_val["criterion"])

tabpfn.model.bar_distribution.FullSupportBarDistribution

In [1]:
#probs_val["criterion"].pdf(logits=0.5, y=3)

In [72]:
probs_val["criterion"].borders.max()

tensor(11.9689)

In [47]:
lsum = np.exp(probs_val["logits"][0, :1296]).sum()
lsum

tensor(0.1000)

# Compute crps from logits and bins at once using PyTorch. Given that:
- You have a batch of 𝑁 rows
- Each row has 5000 logits.
- Each row has one corresponding 𝑦 value.
- The bin boundaries are fixed across rows.

In [5]:
print(type(probs_val["logits"]))
print(type(probs_val["criterion"].borders))
print(type(y_validation))

<class 'torch.Tensor'>
<class 'torch.Tensor'>
<class 'pandas.core.series.Series'>


In [6]:
def compute_crps_pytorch(logits, bin_edges, y_values):
    """
    Computes the CRPS for multiple rows of logits and corresponding y-values using PyTorch.

    Args:
        logits: Tensor of shape (N, 5000) - unnormalized logits for each row.
        bin_edges: Tensor of shape (5001,) - common bin edges for all rows.
        y_values: Tensor of shape (N,) - target values for each row.

    Returns:
        Tensor of shape (N,) containing the CRPS values for each row.
    """
    y_values = torch.tensor(y_validation.head(10), dtype=torch.float32)

    # Convert logits to probabilities using softmax
    probs = torch.softmax(logits, dim=1)  # (N, 5000)

    # Compute CDF (cumulative sum of probabilities)
    cdf = torch.cumsum(probs, dim=1)  # (N, 5000)

    # Compute the indicator function (1 if bin edge >= y, else 0)
    # We need to compare each y_value with bin_edges and broadcast correctly
    indicators = (bin_edges[1:].unsqueeze(0) >= y_values.unsqueeze(1)).float()  # (N, 5000)

    # Step 4: Compute bin widths
    bin_widths = (bin_edges[1:] - bin_edges[:-1]).unsqueeze(0)  # (1, 5000)

    # Step 5: Compute CRPS integral for each row
    crps = torch.sum((cdf - indicators) ** 2 * bin_widths, dim=1)  # (N,)

    return crps


In [7]:
y_values = torch.tensor(y_validation.head(10), dtype=torch.float32)

  y_values = torch.tensor(y_validation.head(10), dtype=torch.float32)


In [8]:
crps_values = compute_crps_pytorch(probs_val["logits"], borders_new, y_values)
print("CRPS shape:", crps_values.shape)  # Should be (N,)
print("First few CRPS values:", crps_values[:10])

CRPS shape: torch.Size([10])
First few CRPS values: tensor([0.2449, 0.3318, 0.3937, 0.3613, 0.3199, 0.2840, 0.2493, 0.3083, 0.3252,
        0.2212])


  y_values = torch.tensor(y_validation.head(10), dtype=torch.float32)


# Compute nll using the inbuild forward method in TabPFN

In [9]:
probs_val["criterion"].forward(logits, torch.tensor(y_validation.head(10), dtype=torch.float32))

  probs_val["criterion"].forward(logits, torch.tensor(y_validation.head(10), dtype=torch.float32))


tensor([5.1476, 4.5700, 3.8501, 3.1988, 2.5708, 2.5734, 3.1284, 3.3358, 3.5472,
        4.1890])

## Two Ways to Calculate CRPS and NLL

There are two main approaches to computing the Continuous Ranked Probability Score (CRPS) and Negative Log-Likelihood (NLL):

### 1. Using Logits (5000 Bins)
- For each validation data point, we have **5000 logits** and **5000 bin borders**.
- These logits represent the probability distribution in detail.
- We compute CRPS and NLL directly from these values.

### 2. Using 10 Deciles
- Instead of using all 5000 bins, we only have **10 deciles** (the 10th, 20th, ..., 90th percentiles).
- From these deciles, we reconstruct the **Cumulative Distribution Function (CDF)** and **Probability Density Function (PDF)**.
- We then compute CRPS (via integration) and NLL (by taking the logarithm of the PDF at the true values).

The first method provides a more detailed probability distribution, while the second is a summarized approximation.


# Test CRPS, NLL output against interpolated cdf, pdf from 10 deciles from TabPFN

In [10]:
probs_val["quantiles"]

[array([-1.8277754, -1.8298945, -1.8344615, -1.8417706, -2.021193 ,
        -2.179114 , -2.1690774, -2.169115 , -2.183083 , -3.0689993],
       dtype=float32),
 array([-1.8217977, -1.8240836, -1.8283467, -1.833042 , -1.8389053,
        -2.124267 , -2.155877 , -2.158474 , -2.1637566, -3.0422564],
       dtype=float32),
 array([-1.81873  , -1.8204918, -1.8241727, -1.8284067, -1.8319852,
        -2.0076122, -2.1447604, -2.151616 , -2.1549172, -3.0161467],
       dtype=float32),
 array([-1.8163261, -1.8180671, -1.8209904, -1.8245995, -1.827521 ,
        -1.8498343, -2.1348133, -2.144406 , -2.1470256, -2.950388 ],
       dtype=float32),
 array([-1.813889 , -1.8158144, -1.8185524, -1.8213444, -1.8235852,
        -1.8375537, -2.122051 , -2.1387231, -2.1414561, -2.182059 ],
       dtype=float32),
 array([-1.8113945, -1.813384 , -1.8162241, -1.8186829, -1.8202038,
        -1.8299109, -2.1030521, -2.1323419, -2.1343899, -2.1611454],
       dtype=float32),
 array([-1.8088547, -1.8107562, -1.81347

In [11]:
type(probs_val["quantiles"])

list

In [12]:
pd.DataFrame(probs_val["quantiles"])

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,-1.827775,-1.829895,-1.834461,-1.841771,-2.021193,-2.179114,-2.169077,-2.169115,-2.183083,-3.068999
1,-1.821798,-1.824084,-1.828347,-1.833042,-1.838905,-2.124267,-2.155877,-2.158474,-2.163757,-3.042256
2,-1.81873,-1.820492,-1.824173,-1.828407,-1.831985,-2.007612,-2.14476,-2.151616,-2.154917,-3.016147
3,-1.816326,-1.818067,-1.82099,-1.8246,-1.827521,-1.849834,-2.134813,-2.144406,-2.147026,-2.950388
4,-1.813889,-1.815814,-1.818552,-1.821344,-1.823585,-1.837554,-2.122051,-2.138723,-2.141456,-2.182059
5,-1.811394,-1.813384,-1.816224,-1.818683,-1.820204,-1.829911,-2.103052,-2.132342,-2.13439,-2.161145
6,-1.808855,-1.810756,-1.813478,-1.815959,-1.817224,-1.823668,-2.060403,-2.124233,-2.126952,-2.145902
7,-1.805795,-1.807816,-1.810208,-1.812434,-1.813259,-1.817963,-1.966955,-2.112472,-2.116883,-2.132
8,-1.801099,-1.803176,-1.805692,-1.807615,-1.807767,-1.81045,-1.83994,-2.085774,-2.098086,-2.111278


In [13]:
pd.DataFrame(np.array(probs_val["quantiles"]).T)

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,-1.827775,-1.821798,-1.81873,-1.816326,-1.813889,-1.811394,-1.808855,-1.805795,-1.801099
1,-1.829895,-1.824084,-1.820492,-1.818067,-1.815814,-1.813384,-1.810756,-1.807816,-1.803176
2,-1.834461,-1.828347,-1.824173,-1.82099,-1.818552,-1.816224,-1.813478,-1.810208,-1.805692
3,-1.841771,-1.833042,-1.828407,-1.8246,-1.821344,-1.818683,-1.815959,-1.812434,-1.807615
4,-2.021193,-1.838905,-1.831985,-1.827521,-1.823585,-1.820204,-1.817224,-1.813259,-1.807767
5,-2.179114,-2.124267,-2.007612,-1.849834,-1.837554,-1.829911,-1.823668,-1.817963,-1.81045
6,-2.169077,-2.155877,-2.14476,-2.134813,-2.122051,-2.103052,-2.060403,-1.966955,-1.83994
7,-2.169115,-2.158474,-2.151616,-2.144406,-2.138723,-2.132342,-2.124233,-2.112472,-2.085774
8,-2.183083,-2.163757,-2.154917,-2.147026,-2.141456,-2.13439,-2.126952,-2.116883,-2.098086
9,-3.068999,-3.042256,-3.016147,-2.950388,-2.182059,-2.161145,-2.145902,-2.132,-2.111278


In [17]:
probs_val["quantiles"]

[array([-3.7701688, -3.7668774, -3.7604296, -3.7604551, -3.7620862,
        -3.7699504, -3.7632437, -3.7660859, -3.7688437, -3.7558699],
       dtype=float32),
 array([-3.477859 , -3.4757373, -3.4730644, -3.4753118, -3.472409 ,
        -3.4791975, -3.473359 , -3.473982 , -3.480741 , -3.4652367],
       dtype=float32),
 array([-3.2285626, -3.2234228, -3.2247877, -3.2261848, -3.2156048,
        -3.2244577, -3.2183523, -3.2109957, -3.2221928, -3.2096624],
       dtype=float32),
 array([-3.0612803, -3.0537133, -3.0516655, -3.0515537, -3.0489075,
        -3.052363 , -3.0482128, -3.0430524, -3.045863 , -3.0450547],
       dtype=float32),
 array([-2.96372  , -2.9582756, -2.9565432, -2.9552174, -2.9545746,
        -2.955995 , -2.953977 , -2.9494987, -2.9494128, -2.9519734],
       dtype=float32),
 array([-2.8628237, -2.8587978, -2.8587139, -2.8565624, -2.8562016,
        -2.853871 , -2.855647 , -2.849408 , -2.847637 , -2.8541012],
       dtype=float32),
 array([-2.7359865, -2.736009 , -2.74214

In [10]:
probabilities = quantiles_custom

# Initialize lists to store the results
crps_cdf_linear_a = []
crps_hybrid_cdf_a = []
crps_normal_a = []

nll_pdf_linear_a = []
nll_pdf_hybrid_a = []
nll_normal_a = []

for i in range(0, 10):
    quantile_i = all_quantiles[:, i]
    y_i = y_validation[i]
    
    # Modify this line to expect 6 values
    cdf_linear, hybrid_cdf, crps_normal, pdf_linear, pdf_hybrid, nll_normal = evaluate(quantile_i, probabilities, y_i, -20, 5)
    
    # Append the results to respective lists
    crps_cdf_linear_a.append(cdf_linear)
    crps_hybrid_cdf_a.append(hybrid_cdf)
    crps_normal_a.append(crps_normal)
    
    nll_pdf_linear_a.append(pdf_linear)
    nll_pdf_hybrid_a.append(pdf_hybrid)
    nll_normal_a.append(nll_normal)


print("crps linear", crps_cdf_linear_a)
print("crps hybrid", crps_hybrid_cdf_a)
print("crps normal", crps_normal_a)

print("NLL linaer", nll_pdf_linear_a)
print("NLL hybrid", nll_pdf_hybrid_a)
print("NLL normal", nll_normal_a)

# Calculate and print the mean values
mean_crps_cdf_linear = np.mean(crps_cdf_linear_a)
mean_crps_hybrid_cdf_a = np.mean(crps_hybrid_cdf_a)
mean_crps_normal_a = np.mean(crps_normal_a)

mean_nll_pdf_linear_a = np.mean(nll_pdf_linear_a)
mean_nll_pdf_hybrid_a = np.mean(nll_pdf_hybrid_a)
mean_nll_normal_a = np.mean(nll_normal_a)

# Print the results
print(f"Mean CRPS for CDF Linear interpolation: {mean_crps_cdf_linear:.4f}")
print(f"Mean CRPS for Hybrid CDF interpolation: {mean_crps_hybrid_cdf_a:.4f}")
print(f"Mean CRPS for Normal distribution interpolation: {mean_crps_normal_a:.4f}")

print(f"Mean NLL for PDF Linear interpolation: {mean_nll_pdf_linear_a:.4f}")
print(f"Mean NLL for PDF Hybrid interpolation: {mean_nll_pdf_hybrid_a:.4f}")
print(f"Mean NLL for Normal distribution interpolation: {mean_nll_normal_a:.4f}")


  y_i = y_validation[i]
  If increasing the limit yields no improvement it is advised to analyze 
  the integrand in order to determine the difficulties.  If the position of a 
  local difficulty can be determined (singularity, discontinuity) one will 
  probably gain from splitting up the interval and calling the integrator 
  on the subranges.  Perhaps a special-purpose integrator should be used.
  crps_value, _ = quad(integrand, y_min, y_max)


crps linear [0.2861731450434026, 0.3580988898201175, 0.4179545219940547, 0.4120568875736647, 0.3797003703216708, 0.36190444693967244, 0.30303610451284635, 0.3476314384598009, 0.38321383265115533, 0.2916924175353249]
crps hybrid [2.49166540025684e-15, 1.9075701613682034e-15, 0.4124433976772819, 0.4031903742239475, 0.31477283325851985, 0.2946693203634102, 0.26057055066927476, 0.32155665373096776, 0.36138305994968795, 0.22006106196446967]
crps normal [0.2501050435246061, 0.3396532643296552, 0.41374935592384066, 0.4055244014621639, 0.3130200358165781, 0.29751073223216096, 0.24915259728660774, 0.32162020720862916, 0.3647230514548862, 0.1239660242346708]
NLL linaer [5.202435482758078, 5.202315383419898, 5.202032332545818, 5.201559836111698, 5.181181573858381, 5.18215916045088, 5.183453159210744, 5.183484663046921, 5.18267136974656, 2.0619540484106333]
NLL hybrid [9.424587086291819, 12.04605235826752, 13.640128966945316, 12.069828345915163, 1.6053173154806082, 1.1464077158476758, 6.4756578160

In [11]:
data = {
    'CRPS Linear': crps_cdf_linear_a,
    'CRPS Hybrid': crps_hybrid_cdf_a,
    'CRPS Normal': crps_normal_a,
    'CRPS (5000 quantiles)': crps_values,
    'NLL Linear': nll_pdf_linear_a,
    'NLL Hybrid': nll_pdf_hybrid_a,
    'NLL Normal': nll_normal_a,
    'NLL (5000 quantiles)': probs_val["criterion"].forward(logits, torch.tensor(y_validation.head(10), dtype=torch.float32)),
    'y values': y_values,
    'first quantile': all_quantiles[0,:],
    'last quantile': all_quantiles[-1,:]

}

# Create DataFrame
df = pd.DataFrame(data)
df = df.round(8)
df

  'NLL (5000 quantiles)': probs_val["criterion"].forward(logits, torch.tensor(y_validation.head(10), dtype=torch.float32)),


Unnamed: 0,CRPS Linear,CRPS Hybrid,CRPS Normal,CRPS (5000 quantiles),NLL Linear,NLL Hybrid,NLL Normal,NLL (5000 quantiles),y values,first quantile,last quantile
0,0.286173,0.0,0.250105,0.244875,5.202435,9.424587,300.573703,5.147587,-2.069994,-1.828573,-1.800969
1,0.358099,0.0,0.339653,0.331774,5.202315,12.046052,546.62126,4.569952,-2.161714,-1.830756,-1.803196
2,0.417955,0.412443,0.413749,0.393674,5.202032,13.640129,659.976031,3.85008,-2.239793,-1.835898,-1.805628
3,0.412057,0.40319,0.405524,0.361293,5.20156,12.069828,438.459498,3.198795,-2.236484,-1.844478,-1.807213
4,0.3797,0.314773,0.31302,0.319856,5.181182,1.605317,5.17902,2.570792,-2.237034,-2.210712,-1.807299
5,0.361904,0.294669,0.297511,0.283957,5.182159,1.146408,1.947727,2.57338,-2.317299,-2.193313,-1.811177
6,0.303036,0.260571,0.249153,0.249342,5.183453,6.475658,2.654324,3.128364,-2.394849,-2.170256,-1.848092
7,0.347631,0.321557,0.32162,0.308329,5.183485,9.519523,56.119942,3.335771,-2.474023,-2.169694,-2.083438
8,0.383214,0.361383,0.364723,0.32521,5.182671,8.115324,70.710684,3.547239,-2.522667,-2.18419,-2.096891
9,0.291692,0.220061,0.123966,0.221219,2.061954,3.690944,0.183938,4.189002,-2.588567,-3.0659,-2.107797


In [16]:
pd.DataFrame(borders_new)

Unnamed: 0,0
0,-74.242508
1,-22.184608
2,-19.463432
3,-18.329407
4,-17.415796
...,...
4996,14.278324
4997,15.063480
4998,16.556814
4999,19.246876


In [16]:
borders_df = pd.DataFrame(borders_new, columns=["border"])
borders_df

Unnamed: 0,border
0,-74.242508
1,-22.184608
2,-19.463432
3,-18.329407
4,-17.415796
...,...
4996,14.278324
4997,15.063480
4998,16.556814
4999,19.246876


In [12]:
bin_probs = torch.softmax(logits, dim=1)

bin_widths = borders_new[1:] - borders_new[:-1]

# Expand bin_widths to match the shape of the probabilities tensor [10, 5000]
# We will repeat the bin_widths across the rows to match the number of rows in probabilities
bin_widths_expanded = bin_widths.repeat(10, 1)

# Divide the probabilities by the bin widths
adjusted_probabilities = bin_probs / bin_widths_expanded

- np.log(adjusted_probabilities[9, 3341:3346])

  - np.log(adjusted_probabilities[9, 3341:3346])


tensor([inf, inf, inf, inf, inf])

In [13]:
logits[9, 3343:3344]

tensor([-inf])

In [14]:
probs_val["criterion"].forward(logits, torch.tensor(y_validation.head(10)))[9]

  probs_val["criterion"].forward(logits, torch.tensor(y_validation.head(10)))[9]


tensor(4.1890)

In [18]:
pd.DataFrame(borders_df[(borders_df['border'] >= -2.60) & (borders_df['border'] <= -2.58)]) #3340

Unnamed: 0,border
2219,-2.599931
2220,-2.597499
2221,-2.595043
2222,-2.592509
2223,-2.590126
2224,-2.587731
2225,-2.585031
2226,-2.582548
