# Logistic Regression

In [1]:
import math
import numpy as np
import pandas as pd
import sklearn.linear_model
import statsmodels.api as sm
import matplotlib.pyplot as plt

## Simulierte Daten

In [2]:
def generate_prices(observations_count, prices_range):
    return np.random.choice(prices_range, observations_count)

def generate_competitor_prices(observations_count, competitors_count, prices_range):
    return np.matrix([generate_prices(observations_count, prices_range) for i in range(0, competitors_count)])
    
def calculate_ranks(observations_count, competitors_count, competitor_prices, prices):
    return [1 + len([1 for j in range(competitors_count) if competitor_prices[j,i] < prices[i]])
            for i in range(observations_count)]

def calculate_sold_probs_A(observations_count, ranks, competitors_count, prices):
    max_prob = lambda i: 1 - ((0.3 * ranks[0][i])/(competitors_count[0] + 1)) - 0.05 * prices[0][i] + (-0.0125 * (prices[0][i] - prices[1][i]) + 0.25)
    return [np.maximum(0, np.round(np.random.uniform(0, max_prob(i)))) for i in range(observations_count)]

def calculate_sold_probs_B(observations_count, ranks, competitors_count, prices):
    max_prob = lambda i: 1 - ((0.3 * ranks[1][i])/(competitors_count[1] + 1)) - 0.05 * prices[1][i] + (0.0125 * (prices[0][i] - prices[1][i]) + 0.25)
    return [np.maximum(0, np.round(np.random.uniform(0, max_prob(i)))) for i in range(observations_count)]

observations_count = 1000
prices_ranges = [np.arange(1, 20.1, 0.1) for i in range(2)] #TODO adopt prob function if price changes
prices = [generate_prices(observations_count, prices_ranges[i]) for i in range(2)] # TODO p1 > p2?
competitors_count = [5, 5]
competitor_prices = [generate_competitor_prices(observations_count, competitors_count[i], prices_ranges[i]) for i in range(2)]
ranks = [calculate_ranks(observations_count, competitors_count[i], competitor_prices[i], prices[i]) for i in range(2)]
sold = [calculate_sold_probs_A(observations_count, ranks, competitors_count, prices),
        calculate_sold_probs_B(observations_count, ranks, competitors_count, prices)]

## Regression

In [12]:
def get_explanatory_vars_A(observations_count, competitors_count, ranks, competitor_prices):
    explanatory_1 = [1 for j in range(observations_count)]
    explanatory_2 = [ranks[0][j] for j in range(observations_count)]
    
    def get_all_competitor_prices(j):
        for i in range(2):
            for k in range(competitors_count[i]):
                yield competitor_prices[i][k,j]
    explanatory_3 = [prices[0][j] - np.min(list(get_all_competitor_prices(j))) for j in range(observations_count)]
    explanatory_4 = [prices[0][j] - np.min([prices[i][j] for i in range(2)]) for j in range(observations_count)]
    explanatory_5 = [math.pow(explanatory_4[j], 2) for j in range(observations_count)]
    return np.matrix([explanatory_1, explanatory_2, explanatory_3, explanatory_4, explanatory_5])

def get_explanatory_vars_B(observations_count, competitors_count, ranks, competitor_prices):
    explanatory_1 = [1 for j in range(observations_count)]
    explanatory_2 = [ranks[1][j] for j in range(observations_count)]
    
    def get_all_competitor_prices(j):
        for i in range(2):
            for k in range(competitors_count[i]):
                yield competitor_prices[i][k,j]
    explanatory_3 = [prices[1][j] - np.min(list(get_all_competitor_prices(j))) for j in range(observations_count)]
    explanatory_4 = [prices[1][j] - np.min([prices[i][j] for i in range(2)]) for j in range(observations_count)]
    explanatory_5 = [math.pow(explanatory_4[j], 2) for j in range(observations_count)]
    return np.matrix([explanatory_1, explanatory_2, explanatory_3, explanatory_4, explanatory_5])

explanatory_vars = [get_explanatory_vars_A(observations_count, competitors_count, ranks, competitor_prices),
                    get_explanatory_vars_B(observations_count, competitors_count, ranks, competitor_prices)]
logits = [sm.Logit(sold[i], explanatory_vars[i].transpose()) for i in range(2)]
results = [logits[i].fit() for i in range(2)]
beta = [results[i].params for i in range(2)]
beta

Optimization terminated successfully.
         Current function value: 0.392666
         Iterations 7
Optimization terminated successfully.
         Current function value: 0.362382
         Iterations 7


[array([ 0.76827562, -0.57955443, -0.11461301]),
 array([ 0.94689306, -0.60563729, -0.14848387])]

In [13]:
[results[i].aic for i in range(2)]

[791.33197388956091, 730.76368256322735]

## Optimierung

In [14]:
def get_price_index(price):
    return int(price / 0.1 - 10) # TODO dependent on price range?

def calculate_sale_probs(beta, explanatory_vars, prices_range):
    L = lambda price: np.sum([beta[m] * explanatory_vars[m,get_price_index(price)] for m in range(explanatory_vars.shape[0])])
    return [np.exp(L(price)) / (1 + np.exp(L(price))) for price in prices_range]

def bellman(sale_probs, prices_range, delta, values, step):
    prob_A = lambda price, i: sale_probs[0, i, get_price_index(price)]
    prob_B = lambda price, j: sale_probs[1, j, get_price_index(price)]
    todays_profit = lambda prices, j, i: i * prices[j]
    disc_exp_fut_profits = delta * values[step + 1]

    bellman_for_combinations = []
    for price_A in prices_range[0]:
        for price_B in prices_range[1]:
            bellman_for_combinations.append(np.sum([prob_A(price_A, i) * np.sum([prob_B(price_B, j) * ((todays_profit([price_A, price_B], 0, i)+todays_profit([price_A, price_B], 1, j)) + disc_exp_fut_profits) for j in range(2)]) for i in range(2)]))
    return np.max(bellman_for_combinations)

def bellman_check(sale_probs, prices, delta, values, step):
    prob_A = lambda price, i: sale_probs[0, i, get_price_index(price)]
    prob_B = lambda price, j: sale_probs[1, j, get_price_index(price)]
    todays_profit = lambda prices, j, i: i * prices[j]
    disc_exp_fut_profits = delta * values[step + 1]

    return np.sum([prob_A(price_A, i) * np.sum([prob_B(price_B, j) * ((todays_profit([price_A, price_B], 0, i)+todays_profit([price_A, price_B], 1, j)) + disc_exp_fut_profits) for j in range(2)]) for i in range(2)])

In [15]:
competitor_prices = [generate_competitor_prices(observations_count, competitors_count[i], prices_ranges[i]) for i in range(2)]
explanatory_vars = [get_explanatory_vars_A(observations_count, competitors_count, ranks, competitor_prices),
                    get_explanatory_vars_B(observations_count, competitors_count, ranks, competitor_prices)]

p = [calculate_sale_probs(beta[i], explanatory_vars[i], prices_ranges[i]) for i in range(2)]
sale_probs = np.array([[[1 - j for j in p[i]], p[i]] for i in range(2)])

delta = 0.99
steps = 100
values = np.empty(shape=(steps + 1))
for step in range(steps, -1, -1):
    if step == steps:
        values[step] = 0
    else:
        values[step] = bellman(sale_probs, prices_ranges, delta, values, step)

opt_prices = np.zeros(shape=(2))
for price_A in prices_ranges[0]:
    for price_B in prices_ranges[1]:
        if bellman_check(sale_probs, [price_A, price_B], delta, values, step) == values[0]:
            opt_prices[0] = price_A
            opt_prices[1] = price_B      
opt_prices

array([ 19.8,  19.5])

In [16]:
values

array([ 1754.17666058,  1743.94631063,  1733.6126238 ,  1723.1745563 ,
        1712.63105377,  1701.98105122,  1691.22347288,  1680.35723214,
        1669.38123139,  1658.29436194,  1647.09550392,  1635.78352612,
        1624.35728591,  1612.81562914,  1601.15738997,  1589.38139082,
        1577.48644217,  1565.47134254,  1553.33487825,  1541.07582342,
        1528.69293976,  1516.18497646,  1503.55067009,  1490.78874447,
        1477.89791051,  1464.87686611,  1451.72429601,  1438.43887166,
        1425.01925111,  1411.46407883,  1397.77198562,  1383.94158844,
        1369.97149028,  1355.86028002,  1341.60653227,  1327.20880728,
        1312.66565073,  1297.9755936 ,  1283.13715205,  1268.14882726,
        1253.00910525,  1237.71645675,  1222.26933706,  1206.66618585,
        1190.90542706,  1174.98546868,  1158.90470264,  1142.66150463,
        1126.2542339 ,  1109.68123317,  1092.94082839,  1076.03132861,
        1058.9510258 ,  1041.69819468,  1024.27109255,  1006.66795907,
      