In [1]:
from scipy.stats import nbinom, binom, poisson
from statsmodels.discrete.count_model import ZeroInflatedNegativeBinomialP
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
import math
import seaborn as sns
from datetime import datetime, timedelta
from scipy.stats import pearsonr
from scipy.stats import kendalltau
from scipy.stats import spearmanr
import os
from scipy.optimize import minimize, Bounds, LinearConstraint, NonlinearConstraint
import inspect

### Relevante Parameter:

array_lambda = np.exp(np.log(0.63*EM_monat) + alpha_o[state] + beta_o[0] * np.log(LO_monat))

Die Öffungszahlen sind mit einer Poissonverteilung modelliert und die Werte folgender Tabellen haben Einfluss:

1. EM_monat
2. LO_monat

folgende Parameter haben einen Einfluss:
1. alpha_o (1 x anzahl_states array, also state abhängig)
2. beta_o (1 x 1 array, state unabhängig)

In [2]:
# EM_monat aus der HY Zahlen der versendeten Mails ziehen
path = r'C:/Users/felix.oechslein/Desktop/Analysen Thalia, Mai/verarbeitete_daten'
filename = 'Pivot_HY_Privat_ohneApple_Langzeitkunde.csv'

df_HY = pd.read_csv(os.path.join(path, filename))

In [3]:
# LO monat
path = r'C:/Users/felix.oechslein/Desktop/Analysen Thalia, Mai/verarbeitete_daten'
filename = 'Pivot_LO_Privat_ohneApple_Langzeitkunde.csv'

df_LO = pd.read_csv(os.path.join(path, filename))

### Datenanpassung wie bei der Berechnung der optimalen Parameter

In [4]:
## Alle DataFrame Werte von Float zu int
df_HY = df_HY.astype(int)
df_LO = df_LO.astype(int)

### Sicherstellen dass alle df dieselben Kunden verwenden

In [5]:
# Entfernen der Zeilen aus df_LO, die nicht in df_HY enthalten sind
df_LO = df_LO[df_LO['iid'].isin(df_HY['iid'])]
df_HY = df_HY[df_HY['iid'].isin(df_LO['iid'])]

In [6]:
len(df_LO)

647

In [7]:
len(df_HY)

647

### Ausreißer bei den Kunden löschen (wie bei den Berechnungen)

In [8]:
kunden_pos = [384, 446, 502, 599, 421, 285, 407, 499, 348, 351, 364, 449, 528, 414]

In [9]:
# Zeilen löschen
df_HY = df_HY.drop(kunden_pos).reset_index(drop = True)
df_LO = df_LO.drop(kunden_pos).reset_index(drop = True)

### Spezifikation des Modells

In [10]:
anzahl_kunden = len(df_HY["iid"])
anzahl_kunden

633

In [11]:
anzahl_monate = len(df_HY.columns[df_HY.columns != 'iid'])
anzahl_monate

15

In [12]:
anzahl_states = 3

### 1. Herauslesen der Parameter aus dem array der gesamten geschätzten Parameter

In [13]:
estimted_params=[ 0.00000000e+00, -8.42324287e+01,  1.69437089e+01,  0.00000000e+00,
       -1.51114259e+01, -1.27787560e+01,  0.00000000e+00, -4.66039409e+00,
        3.23378060e+01,  0.00000000e+00, -3.42059247e+01, -3.03032101e-01,
        0.00000000e+00, -1.16969644e+01, -4.21223535e+01,  0.00000000e+00,
       -3.23512569e+01,  5.10330353e+00,  0.00000000e+00, -1.34546356e+00,
       -1.45060842e+00,  0.00000000e+00,  1.05242438e+00, -1.51554184e+01,
        0.00000000e+00,  6.50267936e-01,  1.11128688e+00,  0.00000000e+00,
        7.18149941e+01,  5.07482709e-01,  0.00000000e+00,  1.65202863e+01,
        1.50351002e+01,  0.00000000e+00,  9.40601150e+00, -1.66324994e+01,
        0.00000000e+00, -1.05955017e+01, -1.72078495e-01,  0.00000000e+00,
        2.62869932e+01,  2.28266666e+01,  0.00000000e+00, -1.69422693e+00,
        7.29303793e+00,  4.87220599e-01,  7.01288616e-01,  1.24299189e+00,
       -1.36993482e+00,  1.80247785e+01,  2.12346810e+01,  1.55971024e+01,
        0.00000000e+00,  1.72348151e-01, -5.49982853e-01, -2.11311398e-01,
       -3.31995657e+00, -1.50224012e+00,  2.04106841e-01,  1.71997448e+00,
       -8.41039175e-02, -2.74031043e-01, -4.31434986e-01,  1.32125605e-02,
        1.07630364e+00]

In [14]:
alpha_size = anzahl_states**2
gamma_size = anzahl_states**2 * 4

alpha = estimted_params[:alpha_size]
gamma = estimted_params[alpha_size:alpha_size+gamma_size]
alpha_o = estimted_params[alpha_size+gamma_size:alpha_size+gamma_size+anzahl_states]
beta_o = estimted_params[alpha_size+gamma_size+anzahl_states:alpha_size+gamma_size+anzahl_states+1]
delta_0 = estimted_params[alpha_size+gamma_size+anzahl_states+1:alpha_size+gamma_size+anzahl_states*2+1]
delta_1 = estimted_params[alpha_size+gamma_size+anzahl_states*2+1:alpha_size+gamma_size+anzahl_states*3+1]
alpha_p = estimted_params[alpha_size+gamma_size+anzahl_states*3+1:alpha_size+gamma_size+anzahl_states*4+1]
beta_p_1 = estimted_params[alpha_size+gamma_size+anzahl_states*4+1:alpha_size+gamma_size+anzahl_states*5+1]
beta_p_2 = estimted_params[alpha_size+gamma_size+anzahl_states*5+1:alpha_size+gamma_size+anzahl_states*6+1]
theta = estimted_params[alpha_size+gamma_size+anzahl_states*6+1:]

In [15]:
alpha_o

[0.487220599, 0.701288616, 1.24299189]

In [16]:
beta_o

[-1.36993482]

### Erstellen der erwarteten Öffungen in den jeweiligen Monaten

In [17]:
erwartete_oeffungen = np.zeros((anzahl_kunden, anzahl_states, anzahl_monate))

In [18]:
for kunde in range(0, anzahl_kunden):    
    for state in range(0, anzahl_states):
        for monat in range(1, anzahl_monate +1):
            erwartete_oeffungen[kunde, state-1, monat-1] =\
                    np.exp(np.log(0.63)*df_HY.iloc[kunde][str(monat)] + alpha_o[state] +\
                           beta_o[0] * np.log(df_LO.iloc[kunde][str(monat)]))

In [19]:
np.sum(erwartete_oeffungen[:,0,:])/erwartete_oeffungen[:,0,:].size

0.6736191562238436

In [20]:
np.sum(erwartete_oeffungen[:,1,:])/erwartete_oeffungen[:,1,:].size

1.157905646490088

In [21]:
np.sum(erwartete_oeffungen[:,2,:])/erwartete_oeffungen[:,1,:].size

0.5438083484272844

### Nelder-Mead mit poisite Wahresceinlichkeiten nach state 1

### 1. Herauslesen der Parameter aus dem array der gesamten geschätzten Parameter

In [52]:
estimated_params_2 = [-7.44841087e-01, -8.85139802e+02, -2.40481934e+02, -2.05213086e+00,
       -5.60706087e+02, -5.35441302e+02, -9.63275246e-03, -4.27150076e+02,
       -1.92413334e+01,  1.87679188e+00,  1.58999244e-01, -9.03253128e-02,
       -3.28630690e-01, -2.43215232e-01, -1.37190579e-01,  3.42037204e-01,
        2.51357509e-01,  2.06682854e-01, -4.73934832e-01, -2.71680222e-01,
        1.60689390e-01, -1.72473497e-01, -2.60376329e-01,  2.51404015e-01,
       -1.00917729e-02, -7.52990475e-01,  2.61244903e-02,  7.95823527e-01,
       -5.31101017e-02, -2.62763614e-01,  8.73203616e-01, -7.32946413e-01,
       -3.99863957e-01,  5.41473133e-01,  1.89988509e+00, -7.65683186e-01,
        5.79608497e-01,  1.28236010e-01,  9.41770971e-01, -8.84228191e-01,
       -7.05129175e-01, -2.51729254e+00, -1.31076750e+00,  1.76696263e-01,
        3.34715279e-01,  1.11964744e+00,  3.44350612e-01,  3.11862570e+00,
       -1.35299604e+00,  9.91086034e+01,  5.99497355e+00, -4.56900519e+01,
       -4.73360515e-01, -7.11949781e-01,  6.54292220e-02, -1.26160660e+00,
        7.29158556e+01, -3.38290584e+01, -2.22817443e-01,  8.27814530e-02,
       -4.07211009e-01,  5.30692948e-02, -8.46733418e-01, -5.21951998e-01,
        4.65391969e-01]

In [53]:
alpha_size = anzahl_states**2
gamma_size = anzahl_states**2 * 4

alpha_2 = estimated_params_2[:alpha_size]
gamma_2 = estimated_params_2[alpha_size:alpha_size+gamma_size]
alpha_o_2 = estimated_params_2[alpha_size+gamma_size:alpha_size+gamma_size+anzahl_states]
beta_o_2 = estimated_params_2[alpha_size+gamma_size+anzahl_states:alpha_size+gamma_size+anzahl_states+1]
delta_0_2 = estimated_params_2[alpha_size+gamma_size+anzahl_states+1:alpha_size+gamma_size+anzahl_states*2+1]
delta_1_2 = estimated_params_2[alpha_size+gamma_size+anzahl_states*2+1:alpha_size+gamma_size+anzahl_states*3+1]
alpha_p_2 = estimated_params_2[alpha_size+gamma_size+anzahl_states*3+1:alpha_size+gamma_size+anzahl_states*4+1]
beta_p_1_2 = estimated_params_2[alpha_size+gamma_size+anzahl_states*4+1:alpha_size+gamma_size+anzahl_states*5+1]
beta_p_2_2 = estimated_params_2[alpha_size+gamma_size+anzahl_states*5+1:alpha_size+gamma_size+anzahl_states*6+1]
theta_2 = estimated_params_2[alpha_size+gamma_size+anzahl_states*6+1:]

In [54]:
alpha_o_2

[1.11964744, 0.344350612, 3.1186257]

In [55]:
beta_o_2

[-1.35299604]

### Berechnung erwartetete Öffungen

In [56]:
erwartete_oeffungen_2 = np.zeros((anzahl_kunden, anzahl_states, anzahl_monate))

In [70]:
for kunde in range(0, anzahl_kunden):    
    for state in range(0, anzahl_states):
        for monat in range(1, anzahl_monate +1):
            erwartete_oeffungen_2[kunde, state, monat-1] =\
                    np.exp(np.log(0.63)*df_HY.iloc[kunde][str(monat)] + alpha_o_2[state] +\
                           beta_o_2[0] * np.log(df_LO.iloc[kunde][str(monat)]))

In [71]:
np.sum(erwartete_oeffungen_2[:,0,:])/erwartete_oeffungen_2[:,0,:].size

0.9933553870601063

In [72]:
np.sum(erwartete_oeffungen_2[:,1,:])/erwartete_oeffungen_2[:,1,:].size

0.45750676172047744

In [73]:
np.sum(erwartete_oeffungen_2[:,2,:])/erwartete_oeffungen_2[:,2,:].size

7.332462981759254

In [74]:
df_HY.max()

iid    160436094
1              2
2              3
3              2
4              3
5              2
6              3
7              2
8              2
9              2
10             1
11             1
12             3
13             2
14             3
15             1
dtype: int32

### Nelder-Mead mit poisite Wahresceinlichkeiten nach state 1, 30k Berechungen

### 1. Herauslesen der Parameter aus dem array der gesamten geschätzten Parameter

In [76]:
estimated_params_3 = [ 4.17928553e-01, -4.22285316e+02, -1.98242290e+00, -8.79879518e-01,
       -1.94045445e+03,  6.99046727e+02,  9.13766559e-01, -5.31026400e+02,
        4.31211962e+01, -2.14401838e+00, -1.67642789e+00,  6.08254147e-01,
       -5.04744507e-03,  1.87801747e-02,  6.87473148e-01, -3.36485684e-01,
        1.29745132e+00,  8.72972789e-01, -3.52212973e-01, -4.02096620e-01,
        4.45619697e-01, -6.69209684e-01,  1.44688445e+00,  2.53356872e-01,
        1.80355515e-02, -1.11375261e+00, -4.15990883e-01, -4.54538186e-01,
       -8.26245723e-01, -4.51785453e-01,  5.19280797e-01, -1.28267812e+00,
        1.47131184e+00, -5.86812259e-01,  6.62504829e-01,  1.97691597e+00,
        4.27950006e-01, -3.43138559e+00,  1.03359822e+00,  7.07554502e-02,
        7.10101998e-01,  5.84550086e-01,  1.33027166e+00,  2.22558553e-01,
        1.00029849e+00,  1.36650013e-01, -1.19263433e+00,  1.21048688e+00,
       -1.13572566e+00,  9.56897194e+01,  1.37166797e+01,  2.70925515e+01,
        1.93039105e+00, -1.72773597e+00, -5.83161966e-01, -1.23828420e+00,
        7.92906253e+01, -2.04200635e+00, -3.80553587e-01,  8.20922389e-04,
        5.60347879e-01,  3.41695867e-01,  9.78048662e-01, -1.52109959e-01,
        6.57095859e-01]

In [77]:
alpha_3 = estimated_params_3[:alpha_size]
gamma_3 = estimated_params_3[alpha_size:alpha_size+gamma_size]

alpha_o_3 = estimated_params_3[alpha_size+gamma_size:alpha_size+gamma_size+anzahl_states]
beta_o_3 = estimated_params_3[alpha_size+gamma_size+anzahl_states:alpha_size+gamma_size+anzahl_states+1]
delta_0_3 = estimated_params_3[alpha_size+gamma_size+anzahl_states+1:alpha_size+gamma_size+anzahl_states*2+1]
delta_1_3 = estimated_params_3[alpha_size+gamma_size+anzahl_states*2+1:alpha_size+gamma_size+anzahl_states*3+1]
alpha_p_3 = estimated_params_3[alpha_size+gamma_size+anzahl_states*3+1:alpha_size+gamma_size+anzahl_states*4+1]
beta_p_1_3 = estimated_params_3[alpha_size+gamma_size+anzahl_states*4+1:alpha_size+gamma_size+anzahl_states*5+1]
beta_p_2_3 = estimated_params_3[alpha_size+gamma_size+anzahl_states*5+1:alpha_size+gamma_size+anzahl_states*6+1]
theta_3 = estimated_params_3[alpha_size+gamma_size+anzahl_states*6+1:]

In [78]:
alpha_o_3

[0.136650013, -1.19263433, 1.21048688]

In [79]:
beta_o_3

[-1.13572566]

### Erstellen der erwarteten Öffungen in den jeweiligen Monaten

In [80]:
erwartete_oeffungen_3 = np.zeros((anzahl_kunden, anzahl_states, anzahl_monate))

In [81]:
for kunde in range(0, anzahl_kunden):    
    for state in range(0, anzahl_states):
        for monat in range(1, anzahl_monate +1):
            erwartete_oeffungen_3[kunde, state, monat-1] =\
                    np.exp(np.log(0.63)*df_HY.iloc[kunde][str(monat)] + alpha_o_3[state] +\
                           beta_o_3[0] * np.log(df_LO.iloc[kunde][str(monat)]))

In [82]:
np.sum(erwartete_oeffungen_3[:,0,:])/erwartete_oeffungen_3[:,0,:].size

0.38624154286745543

In [83]:
np.sum(erwartete_oeffungen_3[:,1,:])/erwartete_oeffungen_3[:,1,:].size

0.10222523749274383

In [84]:
np.sum(erwartete_oeffungen_3[:,2,:])/erwartete_oeffungen_3[:,2,:].size

1.1303694435680933