# Excercise Sheet 9

### Exercise 31
A florist buys easy perishable flowers from a wholesaler for 3 Euro per piece and sells the bought flowers for 7 Euro per piece. Each flower not sold on the first day is worthless and trashed.

| k       | 0       | 1       | 2       | 3       | 4       | 5       | 6       |
|---------|---------|---------|---------|---------|---------|---------|---------|
| P[X=k]  | 0.01    | 0.02    | 0.03    | 0.04    | 0.05    | 0.1     | 0.2     |
| k       | 7       | 8       | 9       | 10      | 11      | 12      | >12     |
| ------- | ------- | ------- | ------- | ------- | ------- | ------- | ------- |
| P[X=k]  | 0.2     | 0.1     | 0.09    | 0.08    | 0.07    | 0.01    | 0.0     |

Implement a simulation with the given requirements and run it for 1 year. Answer subsequent questions using a graphical visualisation:
* How many flowers must the florist buy daily for maximizing the profit?
* If the florist buys too many flowers a day, this will result in an expected loss. What is the corresponding threshold (number of flowers bought)?

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats
from scipy.stats import ttest_ind, shapiro, mannwhitneyu, ttest_1samp, wilcoxon

In [None]:
def demand():
    p = [0.01, 0.02, 0.03, 0.04, 0.05, 0.1, 0.2, 0.2, 0.1, 0.09, 0.08, 0.07, 0.01]
    mydist = stats.multinomial(n=1, p=p)
    return int(np.where(mydist.rvs(1) == 1)[1])


def simu(flowers):
    bought_price = 3
    sell_price = 7
    win = -bought_price * flowers
    a_demand = demand()  # how many flowers where requested
    if a_demand <= flowers:
        win = win + sell_price * a_demand  #sell them
    else:
        win = win + sell_price * flowers  # sell them all
    return win


def control(buy, n):
    sample = [simu(buy) for _ in range(n)]
    return sample

In [None]:
# determine the maximum profit
wins = np.zeros(100)
for i in range(100):
    wins[i] = np.mean(control(i, 10000))
max_win = wins.argmax(axis=0)
print(f'Maximum profit with {max_win} flowers bought daily')

In [None]:
# plot the wins
plt.plot(wins[:30])
#red line at 7
plt.axvline(x=7, color='r', linestyle='--')
#green line at 0
plt.axhline(y=0, color='black', linestyle='--')


#determine upper threshold for flowers to buy
def zero_crossing(wins):
    for i in range(1, len(wins) - 1):
        if wins[i] < 0 and wins[i - 1] > 0:
            return i - 1
    return -1


upper_threshold = zero_crossing(wins)
print(f'Upper limit of flowers to buy: {upper_threshold}')

plt.axvline(x=upper_threshold, color='g', linestyle='--')
plt.show()

### Excercise 32
Use your implementation of the previous example and graphically visualize the confidence interval in dependency of the number of simulated days (plot the mean profit, upper and lower CI). How many days must the number of simulated days be in order to reach an accuracy of the estimator of ±1% with a 99% confidence?

In [None]:
# Determine the right n for the control function
def determine_n():
    alpha = 0.01
    confidence = 0.99
    n_target = (stats.norm.ppf(1 - ((1 - confidence) / 2)) / (alpha * 2)) ** 2
    return int(np.ceil(n_target))


n = determine_n()
print(f'Number of simulated days needed for 99% confidence and 1% accuracy: {n}')

In [None]:
# INDIVIDUALLY ADD ADDITIONAL TESTS WITH DIFFERENT N!!
l_arr = []
mu_arr = []
trails = [500, 5000, 10000, n]
for number in trails:
    alpha = 0.01
    confidence = 0.99

    # inv phi
    k = stats.t.ppf(1 - (alpha / 2), number - 1)

    result = control(max_win, number)

    # Sample mean
    mu_hat = np.mean(result)
    mu_arr.append(mu_hat)
    sigma = np.std(result, ddof=1)
    l = (k * sigma / (np.sqrt(number)))
    l_arr.append(l)
    ci_lower = mu_hat - l
    ci_upper = mu_hat + l
    print(f'N: {number}')
    print('Lower ci boundry: {:.2f}'.format(ci_lower))
    print('Upper ci boundry: {:.2f}'.format(ci_upper))

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Plotting the bar plot with CI
plt.bar(trails, mu_arr, yerr=l_arr, capsize=10, width=3500)

# Adding labels and title
plt.xlabel('X-axis')
plt.ylabel('Y-axis')
plt.title('Bar Plot with Confidence Intervals')

# Displaying the plot
plt.show()

### Exercise 33
The flower shop from exercise 31 builds a cold storage. Hence, the shop is able to sell flowers for one single day after they were purchased for a price of 5 Euro, but only if all flowers (the ones purchased for the day) already have been sold and there is further demand.
Analyze if the introduction of the cold storage did cause a significant change on the profits expectation value at a daily flower purchase of 5 or 10 flowers (1% level of significance). At what approximate sample size (number of simulated days) is the statistical test able to recognize the additional returns? Justify the choice of the appropriate statistical test.

In [None]:
LEFT_OVER_PREV_DAY = 0
def simu_old_flowers(flowers):
    global LEFT_OVER_PREV_DAY
    bought_price = 3
    sell_price = 7
    sell_price_old = 5
    win = -bought_price * flowers
    a_demand = demand()  # how many flowers where requested

    if a_demand <= flowers:
        win = win + sell_price * a_demand  #sell them
        left_flowers = flowers - a_demand
        LEFT_OVER_PREV_DAY = left_flowers
    else:
        win = win + sell_price * flowers  # sell them all
        remaining_demand = a_demand - flowers
        if remaining_demand <= LEFT_OVER_PREV_DAY:
            win = win + sell_price_old * remaining_demand  #sell the remaining demand with reduced price
        else:
            win = win + sell_price_old * LEFT_OVER_PREV_DAY  #sell all old flowers
    return win


def control_old_flowers(buy, n):
    sample = [simu_old_flowers(buy) for _ in range(n)]
    return sample

In [None]:
# determine the maximum profit
wins_old = np.zeros(100)
for i in range(3, 100):
    wins_old[i] = np.mean(control_old_flowers(i, 10000))
max_win_old_flowers = wins_old.argmax(axis=0)
print(f'Maximum profit with {max_win_old_flowers} flowers bought daily')

In [None]:
# plot the wins
plt.plot(wins[:30], c='blue')
plt.plot(wins_old[:30], c='red')
#red line at 7
plt.axvline(x=max_win_old_flowers, color='r', linestyle='--')
#green line at 0
plt.axhline(y=max_win, color='black', linestyle='--')
plt.show()

#### Statistical Test (real assignment)

In [None]:
def h0_check_profits_different(flowers, n, outputs_on=True):
    result_no_old_flowers = control_old_flowers(flowers, n)
    results_old_flowers = control_old_flowers(flowers, n)

    #check for normal distribution
    _, p_b = shapiro(results_old_flowers)
    _, p_a = shapiro(result_no_old_flowers)
    # interpret
    alpha = 0.05
    if p_b > alpha and p_a > alpha:
        # Perform the paired t-test
        if outputs_on: print('Data looks normal distributed')
        t_statistic, p_value = ttest_ind(results_old_flowers, result_no_old_flowers)
    else:
        if outputs_on: print('Data not normal distributed')
        p_value = mannwhitneyu(results_old_flowers, result_no_old_flowers).pvalue

    # Set the significance level
    significance_level = 0.01

    if outputs_on: print('p = ', p_value, " | a = ", significance_level)

    # Compare the p-value with the significance level
    # Ho = The cold storage does not change the profits
    if p_value < significance_level:
        if outputs_on: print("Reject the null hypothesis. The cold storage makes the profits significantly different.")
    else:
        if outputs_on: print("Fail to reject the null hypothesis. The cold storage makes the profits not significantly different")
    return p_value

In [None]:
# I compare the two columns and analyse if equal of significantly different --> Quantitative
# 2 independent groups --> 2 unpaired groups

#5 FLOWERS
print('Check the null hypothesis with 5 flowers and 1000 samples')
h0_check_profits_different(5, 1000)

In [None]:
#10 FLOWERS
print('Check the null hypothesis with 10 flowers and 1000 samples')
h0_check_profits_different(10, 1000)

In [None]:
## NOT SURE IF THIS IS NEEDED!!!!!
def h0_check_of_prev_test(p_values, outputs_on=True):
    # we have a 1 group

    #check for normal distribution
    _, p = shapiro(p_values)

    # Set the null hypothesis mean to 1140
    null_hypothesis_mean = 0.01

    # interpret
    alpha = 0.05
    if p > alpha:
        # Perform the one-sample t-test
        if outputs_on: print('Data looks normal distributed')
        t_statistic, p_value = ttest_1samp(p_values, null_hypothesis_mean)
    else:
        if outputs_on: print('Data not normal distributed')
        p_value = wilcoxon(np.array(p_values) - null_hypothesis_mean).pvalue



    # Set the significance level
    significance_level = 0.1
    if outputs_on: print('p = ', p_value)
    # Ho = Out of the previous statistical tests the null hypothesis should be rejected
    if p_value < significance_level:
        if outputs_on: print("Reject the null hypothesis")
        if outputs_on: print(f"With a significance of 10% the previous null hypothesis should not be rejected")
        return False
    else:
        if outputs_on: print("Fail to reject the null hypothesis")
        if outputs_on: print(f"With a significance of 10% the previous null hypothesis should be rejected.")
        return True

In [None]:
# HOW CAN THE CHECK OF N BE DONE CORRECTLY????
# 5 Flowers
for days in (range(3, 10000)):
    p_values = [h0_check_profits_different(5, days, False) for _ in range(1000)]
    is_rejected = h0_check_of_prev_test(p_values, False)
    if is_rejected:
        result_5 = days
        break
print(f'Result n for rejecting the H0 with 5 flowers: {result_5}')

In [None]:
# HOW CAN THE CHECK OF N BE DONE CORRECTLY????
# 10 Flowers
result_10 = 0
for days in (range(100, 10000)):
    p_values = [h0_check_profits_different(10, days, False) for _ in range(50)]
    is_rejected = h0_check_of_prev_test(p_values, False)
    if is_rejected:
        result_10 = days
        break
print(f'Result n for rejecting the H0 with 10 flowers: {result_10}')