In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Daten in ein Pandas-Dataframe laden
data = pd.read_csv('sales_data.csv')

# Aufteilen in Input-Features (Öffnungen) und Output-Features (Käufe)
X = data[['Öffnungen']]
y = data[['Käufe']]

# Aufteilen in Trainings- und Testdaten
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
def log_likelihood(params, observations):
    """
    Calculate the log likelihood of the HMM model given the parameters and observations.
    """
    # Unpack the parameters
    alpha, beta, gamma, delta, sigma_alpha, sigma_beta, rho, mu, sigma_rho, sigma_mu = params

    # Initialize variables
    log_likelihood = 0
    state_probs = initial_state_probs(alpha, beta, gamma, delta, sigma_alpha, sigma_beta, rho, mu, sigma_rho, sigma_mu)

    # Loop over the observations
    for t in range(len(observations)):
        obs = observations[t]
        if t == 0:
            log_likelihood += np.log(state_probs[obs[0]][obs[1]])
        else:
            log_likelihood += np.log(forward_step(obs, state_probs))
        state_probs = backward_step(obs, state_probs)

    return log_likelihood


In [None]:
from scipy.optimize import minimize

# Initial parameter values
alpha = np.zeros((2, 2))
beta = np.zeros((2, 2))
gamma = np.zeros((2, 2))
delta = np.zeros((2, 2))
sigma_alpha = 1.0
sigma_beta = 1.0
rho = 0.5
mu = np.zeros(2)
sigma_rho = 1.0
sigma_mu = 1.0

params = [alpha, beta, gamma, delta, sigma_alpha, sigma_beta, rho, mu, sigma_rho, sigma_mu]

# Perform Maximum Likelihood Estimation to fit the model
result = minimize(lambda x: -log_likelihood(x, observations), params, method='L-BFGS-B')


In [None]:
# Extract the estimated parameters
est_params = result.x
alpha, beta, gamma, delta, sigma_alpha, sigma_beta, rho,


In [None]:
import numpy as np
from scipy.special import gammaln
from scipy.optimize import minimize

def neg_log_likelihood(params, data):
    """
    Function to compute the negative log-likelihood of the model.
    
    Parameters:
    params (list): List of model parameters to be estimated.
    data (tuple): Tuple containing the observed data.
    
    Returns:
    neg_log_likelihood (float): The negative log-likelihood of the model.
    """
    # Extract model parameters from params
    alpha, beta, sigma_alpha, sigma_beta, delta, sigma_delta, r, gamma, sigma_gamma, pi = params
    
    # Extract observed data
    o, y, em, lo, ly = data
    
    # Set up initial values
    N = len(o)
    log_likelihood = 0
    
    # Loop over all customers and time periods
    for i in range(N):
        for t in range(1, len(o[i])):
            
            # Calculate transition probability
            v = alpha + gamma*em[i][t-1] + beta*np.log(lo[i][t-1])
            e = np.random.gumbel(0, 1)
            u = v + e
            q = np.exp(u) / (1 + np.sum(np.exp(alpha + gamma*em[i][t-1] + beta*np.log(lo[i][t-1]))))
            
            # Calculate observation probabilities for opening and purchase
            phi = 1 / (1 + np.exp(delta[0] + delta[1]*np.log(ly[i][t])))
            lam = np.exp(gamma*em[i][t] + beta[0]*em[i][t]**2 + beta[1]*em[i][t] + alpha[1])
            p_open = ((em[i][t]**o[i][t]) / gammaln(o[i][t]+1)) * (lam**o[i][t]) * np.exp(-lam) 
            p_buy = phi*(1-np.exp(-lam/r)) + (1-phi)*np.exp(-lam/r)*(1+ly[i][t]/r)**(-r)*(1+1/(ly[i][t]+1))
            
            # Calculate correlation
            f1 = o[i][t]
            f2 = y[i][t]
            f1_1 = o[i][t-1]
            f2_1 = y[i][t-1]
            corr = c(f1, f2, pi) - c(f1_1, f2, pi) - c(f1, f2_1, pi) + c(f1_1, f2_1, pi)
            
            # Update log-likelihood
            log_likelihood += np.log(q) + np.log(p_open) + np.log(p_buy) + corr
    
    # Return negative log-likelihood
    return -log_likelihood


In [None]:
# Zustandsverteilung zum Zeitpunkt t=0
startprob = np.array([0.5, 0.5])

# Übergangsmatrix
transmat = np.array([[0.7, 0.3],
                     [0.3, 0.7]])

# Emissionswahrscheinlichkeiten
means = np.array([[0.0, 0.5], 
                  [1.0, 0.5]])

covars = np.tile(np.identity(2), (2,1,1))

model = hmm.GaussianHMM(n_components=2, covariance_type="full")
model.startprob_ = startprob
model.transmat_ = transmat
model.means_ = means
model.covars_ = covars

# Vorhersage für die nächsten 5 Schritte
predicted_states, _ = model.predict(X, n_steps=5)

print(predicted_states)


In [None]:
# Vorhersagen auf Testdaten
test_preds = model.predict(test_data)
test_open_preds = test_preds[:, 0]
test_purchase_preds = test_preds[:, 1]


In [None]:
# Mittlere absolute Abweichung für Öffnungen
test_open_mae = np.mean(np.abs(test_open_preds - test_data[:, 0]))

# Mittlere absolute Abweichung für Käufe
test_purchase_mae = np.mean(np.abs(test_purchase_preds - test_data[:, 1]))

print(f'Test MAE für Öffnungen: {test_open_mae:.4f}')
print(f'Test MAE für Käufe: {test_purchase_mae:.4f}')


In [None]:
import matplotlib.pyplot as plt

# Plot für Öffnungen
plt.scatter(test_data[:, 0], test_open_preds, alpha=0.5)
plt.plot([0, 1], [0, 1], 'r--')
plt.xlabel('Tatsächliche Öffnungen')
plt.ylabel('Vorhergesagte Öffnungen')
plt.title('Vorhersagegenauigkeit für Öffnungen')
plt.show()

# Plot für Käufe
plt.scatter(test_data[:, 1], test_purchase_preds, alpha=0.5)
plt.plot([0, 1], [0, 1], 'r--')
plt.xlabel('Tatsächliche Käufe')
plt.ylabel('Vorhergesagte Käufe')
plt.title('Vorhersagegenauigkeit für Käufe')
plt.show()


In [None]:
# Neue Daten zum Vorhersagen
new_data = np.array([[0, 0, 10, 100]])

# Vorhersage für Öffnungen und Käufe
new_preds = model.predict(new_data)
new_open


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Daten in ein Pandas-Dataframe laden
data = pd.read_csv('sales_data.csv')

# Aufteilen in Input-Features (Öffnungen) und Output-Features (Käufe)
X = data[['Öffnungen']]
y = data[['Käufe']]

# Aufteilen in Trainings- und Testdaten
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
