In [3]:
from scipy.stats import nbinom, binom, poisson
from statsmodels.discrete.count_model import ZeroInflatedNegativeBinomialP
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
import math
import seaborn as sns
from datetime import datetime, timedelta
from scipy.stats import pearsonr
from scipy.stats import kendalltau
from scipy.stats import spearmanr
import os
from scipy.optimize import minimize, Bounds, LinearConstraint, NonlinearConstraint
import inspect

### Relevante Parameter:

array_lambda = np.exp(np.log(0.63*EM_monat) + alpha_o[state] + beta_o[0] * np.log(LO_monat))

Die Öffungszahlen sind mit einer Poissonverteilung modelliert und die Werte folgender Tabellen haben Einfluss:

1. EM_monat
2. LO_monat

folgende Parameter haben einen Einfluss:
1. alpha_o (1 x anzahl_states array, also state abhängig)
2. beta_o (1 x 1 array, state unabhängig)

In [4]:
# EM_monat aus der HY Zahlen der versendeten Mails ziehen
path = r'C:/Users/felix.oechslein/Desktop/Analysen Thalia, Mai/verarbeitete_daten'
filename = 'Pivot_HY_Privat_ohneApple_Langzeitkunde.csv'

df_HY = pd.read_csv(os.path.join(path, filename))

In [5]:
# LO monat
path = r'C:/Users/felix.oechslein/Desktop/Analysen Thalia, Mai/verarbeitete_daten'
filename = 'Pivot_LO_Privat_ohneApple_Langzeitkunde.csv'

df_LO = pd.read_csv(os.path.join(path, filename))

### Datenanpassung wie bei der Berechnung der optimalen Parameter

In [6]:
## Alle DataFrame Werte von Float zu int
df_HY = df_HY.astype(int)
df_LO = df_LO.astype(int)

### Sicherstellen dass alle df dieselben Kunden verwenden

In [7]:
# Entfernen der Zeilen aus df_LO, die nicht in df_HY enthalten sind
df_LO = df_LO[df_LO['iid'].isin(df_HY['iid'])]
df_HY = df_HY[df_HY['iid'].isin(df_LO['iid'])]

In [8]:
len(df_LO)

647

In [9]:
len(df_HY)

647

### Ausreißer bei den Kunden löschen (wie bei den Berechnungen)

In [10]:
kunden_pos = [384, 446, 502, 599, 421, 285, 407, 499, 348, 351, 364, 449, 528, 414]

In [11]:
# Zeilen löschen
df_HY = df_HY.drop(kunden_pos).reset_index(drop = True)
df_LO = df_LO.drop(kunden_pos).reset_index(drop = True)

### Spezifikation des Modells

In [12]:
anzahl_kunden = len(df_HY["iid"])
anzahl_kunden

633

In [13]:
anzahl_monate = len(df_HY.columns[df_HY.columns != 'iid'])
anzahl_monate

15

In [14]:
anzahl_states = 2

### 1. Herauslesen der Parameter aus dem array der gesamten geschätzten Parameter

In [15]:
estimted_params=[  0.        , -18.15053611,   0.        , -58.29432985,
         0.        ,  -2.90055678,   0.        ,  14.34443851,
         0.        ,   0.45459921,   0.        , -15.93902142,
         0.        ,  26.45959452,   0.        ,  44.41306213,
         0.        ,  -7.85441467,   0.        , -11.04343658,
         0.73832154,   1.79246772,  -1.25828191,   8.15200577,
        14.28575254,  -0.76026437,  -1.58603474,  -1.21677622,
        -2.79459761,  -0.25805106,   0.73952882,   0.08644445,
        -0.11602983,   0.81965572]

In [16]:
alpha_size = anzahl_states**2
gamma_size = anzahl_states**2 * 4

alpha = estimted_params[:alpha_size]
gamma = estimted_params[alpha_size:alpha_size+gamma_size]
alpha_o = estimted_params[alpha_size+gamma_size:alpha_size+gamma_size+anzahl_states]
beta_o = estimted_params[alpha_size+gamma_size+anzahl_states:alpha_size+gamma_size+anzahl_states+1]
delta_0 = estimted_params[alpha_size+gamma_size+anzahl_states+1:alpha_size+gamma_size+anzahl_states*2+1]
delta_1 = estimted_params[alpha_size+gamma_size+anzahl_states*2+1:alpha_size+gamma_size+anzahl_states*3+1]
alpha_p = estimted_params[alpha_size+gamma_size+anzahl_states*3+1:alpha_size+gamma_size+anzahl_states*4+1]
beta_p_1 = estimted_params[alpha_size+gamma_size+anzahl_states*4+1:alpha_size+gamma_size+anzahl_states*5+1]
beta_p_2 = estimted_params[alpha_size+gamma_size+anzahl_states*5+1:alpha_size+gamma_size+anzahl_states*6+1]
theta = estimted_params[alpha_size+gamma_size+anzahl_states*6+1:]

In [17]:
alpha_o

[0.73832154, 1.79246772]

In [18]:
beta_o

[-1.25828191]

### Erstellen der erwarteten Öffungen in den jeweiligen Monaten

In [19]:
erwartete_oeffungen = np.zeros((anzahl_kunden, anzahl_states, anzahl_monate))

In [20]:
for kunde in range(0, anzahl_kunden):    
    for state in range(0, anzahl_states):
        for monat in range(1, anzahl_monate +1):
            erwartete_oeffungen[kunde, state-1, monat-1] =\
                    np.exp(np.log(0.63)*df_HY.iloc[kunde][str(monat)] + alpha_o[state] +\
                           beta_o[0] * np.log(df_LO.iloc[kunde][str(monat)]))

In [21]:
np.sum(erwartete_oeffungen[:,0,:])/erwartete_oeffungen[:,0,:].size

2.039700874706104

In [22]:
np.sum(erwartete_oeffungen[:,1,:])/erwartete_oeffungen[:,1,:].size

0.7108150476250417

# 2.Test für 2 states und 1000 Iterationen und bounds

### 1. Herauslesen der Parameter aus dem array der gesamten geschätzten Parameter

In [24]:
estimated_params_2 = [ 0.        ,  3.13291287,  0.        ,  1.60263079,  0.        ,
        0.06988214,  0.        ,  0.79140532,  0.        ,  0.91950281,
        0.        , -1.10907081,  0.        , -3.41801979,  0.        ,
       -3.41917121,  0.        , -4.23594507,  0.        , -3.9000474 ,
        1.10129257, -5.        , -0.79042102,  5.        ,  5.        ,
        4.63925216,  1.95999501, -0.53922688, -1.71813938, -0.86955723,
       -1.45737121,  0.18195139,  1.1       ,  0.90651293]

In [25]:
alpha_size = anzahl_states**2
gamma_size = anzahl_states**2 * 4

alpha_2 = estimated_params_2[:alpha_size]
gamma_2 = estimated_params_2[alpha_size:alpha_size+gamma_size]
alpha_o_2 = estimated_params_2[alpha_size+gamma_size:alpha_size+gamma_size+anzahl_states]
beta_o_2 = estimated_params_2[alpha_size+gamma_size+anzahl_states:alpha_size+gamma_size+anzahl_states+1]
delta_0_2 = estimated_params_2[alpha_size+gamma_size+anzahl_states+1:alpha_size+gamma_size+anzahl_states*2+1]
delta_1_2 = estimated_params_2[alpha_size+gamma_size+anzahl_states*2+1:alpha_size+gamma_size+anzahl_states*3+1]
alpha_p_2 = estimated_params_2[alpha_size+gamma_size+anzahl_states*3+1:alpha_size+gamma_size+anzahl_states*4+1]
beta_p_1_2 = estimated_params_2[alpha_size+gamma_size+anzahl_states*4+1:alpha_size+gamma_size+anzahl_states*5+1]
beta_p_2_2 = estimated_params_2[alpha_size+gamma_size+anzahl_states*5+1:alpha_size+gamma_size+anzahl_states*6+1]
theta_2 = estimated_params_2[alpha_size+gamma_size+anzahl_states*6+1:]

In [26]:
alpha_o_2

[1.10129257, -5.0]

In [27]:
beta_o_2

[-0.79042102]

### Berechnung erwartetete Öffungen

In [28]:
erwartete_oeffungen_2 = np.zeros((anzahl_kunden, anzahl_states, anzahl_monate))

In [29]:
for kunde in range(0, anzahl_kunden):    
    for state in range(0, anzahl_states):
        for monat in range(1, anzahl_monate +1):
            erwartete_oeffungen_2[kunde, state, monat-1] =\
                    np.exp(np.log(0.63)*df_HY.iloc[kunde][str(monat)] + alpha_o_2[state] +\
                           beta_o_2[0] * np.log(df_LO.iloc[kunde][str(monat)]))

In [30]:
np.sum(erwartete_oeffungen_2[:,0,:])/erwartete_oeffungen_2[:,0,:].size

1.1296895071887876

In [31]:
np.sum(erwartete_oeffungen_2[:,1,:])/erwartete_oeffungen_2[:,1,:].size

0.0025304712027657333

# 3. Test für 2 states und 5000 Iterationen

### 1. Herauslesen der Parameter aus dem array der gesamten geschätzten Parameter

In [45]:
estimated_params_3 = [ 0.00000000e+00, -1.55403743e+07,  0.00000000e+00, -1.99376641e+07,
        0.00000000e+00, -1.20984080e+05,  0.00000000e+00, -2.60670922e+07,
        0.00000000e+00,  1.65105433e+04,  0.00000000e+00, -2.37657250e+07,
        0.00000000e+00,  2.31826307e+07,  0.00000000e+00,  4.36397955e+07,
        0.00000000e+00, -7.68220378e+06,  0.00000000e+00, -1.03216260e+07,
       -2.39595796e+03,  1.78690685e+03,  2.21360679e+03,  2.55189594e+04,
       -3.45447527e+04,  2.85540133e+04,  3.51966973e+06, -2.12287490e+04,
        2.73574014e+05,  1.57217226e+04, -2.29452252e+05, -7.77517142e+03,
        4.70717259e+04, -3.44150531e+03]

In [46]:
alpha_3 = estimated_params_3[:alpha_size]
gamma_3 = estimated_params_3[alpha_size:alpha_size+gamma_size]

alpha_o_3 = estimated_params_3[alpha_size+gamma_size:alpha_size+gamma_size+anzahl_states]
beta_o_3 = estimated_params_3[alpha_size+gamma_size+anzahl_states:alpha_size+gamma_size+anzahl_states+1]
delta_0_3 = estimated_params_3[alpha_size+gamma_size+anzahl_states+1:alpha_size+gamma_size+anzahl_states*2+1]
delta_1_3 = estimated_params_3[alpha_size+gamma_size+anzahl_states*2+1:alpha_size+gamma_size+anzahl_states*3+1]
alpha_p_3 = estimated_params_3[alpha_size+gamma_size+anzahl_states*3+1:alpha_size+gamma_size+anzahl_states*4+1]
beta_p_1_3 = estimated_params_3[alpha_size+gamma_size+anzahl_states*4+1:alpha_size+gamma_size+anzahl_states*5+1]
beta_p_2_3 = estimated_params_3[alpha_size+gamma_size+anzahl_states*5+1:alpha_size+gamma_size+anzahl_states*6+1]
theta_3 = estimated_params_3[alpha_size+gamma_size+anzahl_states*6+1:]

In [47]:
alpha_o_3

[-2395.95796, 1786.90685]

In [48]:
beta_o_3

[2213.60679]

### Erstellen der erwarteten Öffungen in den jeweiligen Monaten

In [37]:
erwartete_oeffungen_3 = np.zeros((anzahl_kunden, anzahl_states, anzahl_monate))

In [38]:
for kunde in range(0, anzahl_kunden):    
    for state in range(0, anzahl_states):
        for monat in range(1, anzahl_monate +1):
            erwartete_oeffungen_3[kunde, state, monat-1] =\
                    np.exp(np.log(0.63)*df_HY.iloc[kunde][str(monat)] + alpha_o_3[state] +\
                           beta_o_3[0] * np.log(df_LO.iloc[kunde][str(monat)]))

  np.exp(np.log(0.63)*df_HY.iloc[kunde][str(monat)] + alpha_o_3[state] +\


In [39]:
np.sum(erwartete_oeffungen_3[:,0,:])/erwartete_oeffungen_3[:,0,:].size

inf

In [40]:
np.sum(erwartete_oeffungen_3[:,1,:])/erwartete_oeffungen_3[:,1,:].size

inf

# 4. Test für 2 states und 1000 Iterationen mit ounds + 100 iterationen mit zusätzlichen bounds

In [51]:
estimated_params_4 = [ 0.        ,  3.13291287,  0.        ,  1.60263079,  0.        ,
        0.06988214,  0.        ,  0.79140532,  0.        ,  0.91950281,
        0.        , -1.10907081,  0.        , -3.41801979,  0.        ,
       -3.41917121,  0.        , -4.23594507,  0.        , -3.9000474 ,
        1.10129257, -5.        , -0.79042102,  5.        ,  5.        ,
        4.63925216,  1.95999501, -0.53922688, -1.71813938, -0.86955723,
       -1.45737121,  0.18195139,  1.1       ,  0.90651293]

In [52]:
alpha_4 = estimated_params_4[:alpha_size]
gamma_4 = estimated_params_4[alpha_size:alpha_size+gamma_size]

alpha_o_4 = estimated_params_4[alpha_size+gamma_size:alpha_size+gamma_size+anzahl_states]
beta_o_4 = estimated_params_4[alpha_size+gamma_size+anzahl_states:alpha_size+gamma_size+anzahl_states+1]
delta_0_4 = estimated_params_4[alpha_size+gamma_size+anzahl_states+1:alpha_size+gamma_size+anzahl_states*2+1]
delta_1_4 = estimated_params_4[alpha_size+gamma_size+anzahl_states*2+1:alpha_size+gamma_size+anzahl_states*3+1]
alpha_p_4 = estimated_params_4[alpha_size+gamma_size+anzahl_states*3+1:alpha_size+gamma_size+anzahl_states*4+1]
beta_p_1_4 = estimated_params_4[alpha_size+gamma_size+anzahl_states*4+1:alpha_size+gamma_size+anzahl_states*5+1]
beta_p_2_4 = estimated_params_4[alpha_size+gamma_size+anzahl_states*5+1:alpha_size+gamma_size+anzahl_states*6+1]
theta_4 = estimated_params_4[alpha_size+gamma_size+anzahl_states*6+1:]


In [53]:
alpha_o_4

[1.10129257, -5.0]

In [54]:
beta_o_4

[-0.79042102]

### Erstellen der erwarteten Öffungen in den jeweiligen Monaten

In [55]:
erwartete_oeffungen_4 = np.zeros((anzahl_kunden, anzahl_states, anzahl_monate))

In [57]:
for kunde in range(0, anzahl_kunden):    
    for state in range(0, anzahl_states):
        for monat in range(1, anzahl_monate +1):
            erwartete_oeffungen_4[kunde, state, monat-1] =\
                    np.exp(np.log(0.63)*df_HY.iloc[kunde][str(monat)] + alpha_o_4[state] +\
                           beta_o_4[0] * np.log(df_LO.iloc[kunde][str(monat)]))

In [60]:
np.sum(erwartete_oeffungen_4[:,0,:])/erwartete_oeffungen_3[:,0,:].size

1.1296895071887876

In [59]:
np.sum(erwartete_oeffungen_4[:,1,:])/erwartete_oeffungen_3[:,1,:].size

0.0025304712027657333

# 4. Test für 2 states und 200 Iterationen mit bounds

Definiere bounds
bounds =    
[(-3, 3)] * len(alpha) +\
            [(-3, 3)] * len(gamma) +\
            [(-3, 3)] * len(alpha_o) +\
            [(-3, 3)] * len(beta_o) +\
            [(-8, 8)] * len(delta_0) +\
            [(-3, 2)] * len(delta_1) +\
            [(-3, 3)] * len(alpha_p) +\
            [(-3, 3)] * len(beta_p_1) +\
            [(-3, 3)] * len(beta_p_2) +\
            [(0.1, None)] * len(theta)
            
Startwert
x0 = 0

In [61]:
estimated_params_5 = [ 0.        , -2.57542179,  0.        ,  2.62717551,  0.        ,
       -2.98703585,  0.        , -2.31989391,  0.        ,  0.1361796 ,
        0.        ,  0.03734605,  0.        ,  2.16428968,  0.        ,
        3.        ,  0.        ,  1.49658634,  0.        , -1.30925976,
        0.53596929,  1.56559093, -1.28623055,  6.88631043,  6.44953524,
       -1.28842799,  1.61663301, -1.12905537, -1.85205332, -0.38670405,
        0.135329  ,  0.15976614, -0.01763291,  0.85690663]

In [62]:
alpha_5 = estimated_params_5[:alpha_size]
gamma_5 = estimated_params_5[alpha_size:alpha_size+gamma_size]

alpha_o_5 = estimated_params_5[alpha_size+gamma_size:alpha_size+gamma_size+anzahl_states]
beta_o_5 = estimated_params_5[alpha_size+gamma_size+anzahl_states:alpha_size+gamma_size+anzahl_states+1]
delta_0_5 = estimated_params_5[alpha_size+gamma_size+anzahl_states+1:alpha_size+gamma_size+anzahl_states*2+1]
delta_1_5 = estimated_params_5[alpha_size+gamma_size+anzahl_states*2+1:alpha_size+gamma_size+anzahl_states*3+1]
alpha_p_5 = estimated_params_5[alpha_size+gamma_size+anzahl_states*3+1:alpha_size+gamma_size+anzahl_states*4+1]
beta_p_1_5 = estimated_params_5[alpha_size+gamma_size+anzahl_states*4+1:alpha_size+gamma_size+anzahl_states*5+1]
beta_p_2_5 = estimated_params_5[alpha_size+gamma_size+anzahl_states*5+1:alpha_size+gamma_size+anzahl_states*6+1]
theta_5 = estimated_params_5[alpha_size+gamma_size+anzahl_states*6+1:]


In [64]:
alpha_o_5

[0.53596929, 1.56559093]

In [65]:
beta_o_5

[-1.28623055]

### Erstellen der erwarteten Öffungen in den jeweiligen Monaten

In [66]:
erwartete_oeffungen_5 = np.zeros((anzahl_kunden, anzahl_states, anzahl_monate))

In [67]:
for kunde in range(0, anzahl_kunden):    
    for state in range(0, anzahl_states):
        for monat in range(1, anzahl_monate +1):
            erwartete_oeffungen_5[kunde, state, monat-1] =\
                    np.exp(np.log(0.63)*df_HY.iloc[kunde][str(monat)] + alpha_o_5[state] +\
                           beta_o_5[0] * np.log(df_LO.iloc[kunde][str(monat)]))

In [74]:
np.sum(erwartete_oeffungen_5[:,0,:])/erwartete_oeffungen_5[:,0,:].size

0.5780544696366883

In [75]:
np.sum(erwartete_oeffungen_5[:,1,:])/erwartete_oeffungen_5[:,0,:].size

1.6185561127357522