<a href="https://colab.research.google.com/github/Q-MAB/MultivariateEconometrics/blob/main/Welkom_bij_Colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow_probability as tfp

# Load the CSV file
df = pd.read_csv('repeat_sales_pairs.csv', sep=';', decimal=',')

# Convert 'current_time' to datetime
df['current_time'] = pd.to_datetime(df['current_time'], format='%Y-%m')

# Convert 'log_return' to float
df['log_return'] = df['log_return'].astype(float)

# Create 'x' based on 'time_gap'
df['x'] = np.where(df['time_gap'] <= 6, 1.0 / df['time_gap'], 0.0)

# Drop any NaNs in 'log_return' or 'x', and any zeros
df = df.dropna(subset=['log_return', 'x'])
df = df[(df['log_return'] != 0) & (df['x'] != 0)]

# Prepare data for TensorFlow
log_returns = tf.constant(df['log_return'].values, dtype=tf.float32)
time_gaps = tf.constant(df['time_gap'].values, dtype=tf.float32)
x_values = tf.constant(df['x'].values, dtype=tf.float32)

# Display the first few rows of the processed dataframe
print(df.head())

   identifier id_type combined_id previous_time current_time  log_return  \
2        1027    bgid   bgid_1027       2021-01   2021-05-01    -0.00356   
3        1027    bgid   bgid_1027       2021-05   2021-08-01     0.01767   
5        1027    bgid   bgid_1027       2022-04   2022-05-01     0.03223   
7        1027    bgid   bgid_1027       2022-12   2023-01-01    -0.13256   
9        1027    bgid   bgid_1027       2023-09   2023-12-01     0.02866   

   time_gap  avg_buyer_price_eur_previous  avg_buyer_price_eur_current  age  \
2         4                           563                          561   21   
3         3                           561                          571   21   
5         1                           580                          599   21   
7         1                           588                          515   21   
9         3                           516                          531   21   

       brand bottler  num_trades  vintage_year brand_region  bottle_

In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_probability as tfp

# Aliases
tfd = tfp.distributions
tfb = tfp.bijectors
sts = tfp.sts

# 1) LOAD & PREPARE DATA
# -----------------------------------------------------------------------------
df = pd.read_csv(
    'repeat_sales_pairs.csv',
    sep=';',
    decimal=',',
    parse_dates=['current_time'],
    date_parser=lambda s: pd.to_datetime(s, format='%Y-%m')
)

# cast and filter
df['log_return'] = pd.to_numeric(df['log_return'], errors='coerce')
df = df.dropna(subset=['log_return', 'time_gap'])
df = df[df['time_gap'] > 0]

# compute x feature if you still need it — e.g. inverse time gap for short gaps
df['x'] = np.where(df['time_gap'] <= 6, 1.0 / df['time_gap'], 0.0)

# ensure monotonic monthly index
df = df.set_index('current_time').sort_index()
# if you have missing months and want to fill (optional):
# idx = pd.date_range(df.index.min(), df.index.max(), freq='MS')
# df = df.reindex(idx).interpolate()  # or fillna(0), as appropriate

# extract observations
observations = df['log_return'].values.astype(np.float32)
num_timesteps = len(observations)
time_index = df.index  # DatetimeIndex for mapping back later

# 2) BUILD THE STATE-SPACE MODEL
# -----------------------------------------------------------------------------
# learnable observation noise scale with Student's t for robustness
obs_scale = tfp.util.TransformedVariable(
    0.1, tfb.Softplus(), dtype=tf.float32, name='obs_scale'
)
observation_noise = tfd.StudentT(
    df=tf.constant(4.0, dtype=tf.float32),
    loc=0.,
    scale=obs_scale
)

model = sts.LocalLinearTrendStateSpaceModel(
    num_timesteps=num_timesteps,
    level_scale_prior=sts.LogNormalPrior(loc=0., scale=1.),
    slope_scale_prior=sts.LogNormalPrior(loc=0., scale=1.),
    observation_noise=observation_noise,
    initial_state_prior=sts.MultivariateNormalDiag(
        loc=[0., 0.],
        scale_diag=[1., 1.]
    )
)

# 3) SET UP VARIATIONAL INFERENCE
# -----------------------------------------------------------------------------
surrogate_posterior = sts.build_factored_surrogate_posterior(
    model=model
)

# fit via variational inference
optimizer = tf.optimizers.Adam(learning_rate=0.05)

@tf.function(experimental_compile=True)
def run_vi(num_steps):
    return tfp.vi.fit_surrogate_posterior(
        target_log_prob_fn=model.joint_log_prob(observations),
        surrogate_posterior=surrogate_posterior,
        optimizer=optimizer,
        num_steps=num_steps
    )

# 4) TRAIN
# -----------------------------------------------------------------------------
num_vi_steps = 500
loss_history = run_vi(num_vi_steps)

# 5) EXTRACT POSTERIOR MEANS & PLOT βₜ
# -----------------------------------------------------------------------------
# draw samples from the variational posterior
q_samples = surrogate_posterior.sample(400)

# latent state dims: [..., level, slope]
level_samples = q_samples[..., 0]
slope_samples = q_samples[..., 1]

# compute posterior mean of slope at each time
mean_slope = tf.reduce_mean(slope_samples, axis=0).numpy()

# map back to date index & plot
slope_series = pd.Series(mean_slope, index=time_index)

plt.figure(figsize=(10, 4))
slope_series.plot()
plt.title("Posterior Mean of βₜ (Slope) Over Time")
plt.xlabel("Date")
plt.ylabel("βₜ")
plt.tight_layout()
plt.show()

  df = pd.read_csv(


AttributeError: module 'tensorflow_probability.python.sts' has no attribute 'LogNormalPrior'

In [6]:
!pip install tensorflow-probability==0.18.0

Collecting tensorflow-probability==0.18.0
  Downloading tensorflow_probability-0.18.0-py2.py3-none-any.whl.metadata (13 kB)
Downloading tensorflow_probability-0.18.0-py2.py3-none-any.whl (6.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.6/6.6 MB[0m [31m39.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tensorflow-probability
  Attempting uninstall: tensorflow-probability
    Found existing installation: tensorflow-probability 0.25.0
    Uninstalling tensorflow-probability-0.25.0:
      Successfully uninstalled tensorflow-probability-0.25.0
Successfully installed tensorflow-probability-0.18.0
