# Excercise Sheet 9

### Exercise 31
A florist buys easy perishable flowers from a wholesaler for 3 Euro per piece and sells the bought flowers for 7 Euro per piece. Each flower not sold on the first day is worthless and trashed.

| k      | 0    | 1    | 2    | 3    | 4    | 5   | 6   | 7   | 8   | 9    | 10   | 11   | 12   | >12 |
|--------|------|------|------|------|------|-----|-----|-----|-----|------|------|------|------|-----|
| P[X=k] | 0.01 | 0.02 | 0.03 | 0.04 | 0.05 | 0.1 | 0.2 | 0.2 | 0.1 | 0.09 | 0.08 | 0.07 | 0.01 | 0.0 |

Implement a simulation with the given requirements and run it for 1 year. Answer subsequent questions using a graphical visualisation:
* How many flowers must the florist buy daily for maximizing the profit?
* If the florist buys too many flowers a day, this will result in an expected loss. What is the corresponding threshold (number of flowers bought)?

In [None]:
import warnings

import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats
from scipy.stats import shapiro, ttest_rel, mannwhitneyu

# Ignoring warnings
warnings.filterwarnings('ignore')

In [None]:
def demand():
    p = [0.01, 0.02, 0.03, 0.04, 0.05, 0.1, 0.2, 0.2, 0.1, 0.09, 0.08, 0.07, 0.01]
    mydist = stats.multinomial(n=1, p=p)
    return int(np.where(mydist.rvs(1) == 1)[1])


def simu(flowers):
    bought_price = 3
    sell_price = 7
    win = -bought_price * flowers
    a_demand = demand()  # how many flowers where requested
    if a_demand <= flowers:
        win = win + sell_price * a_demand  #sell them
    else:
        win = win + sell_price * flowers  # sell them all
    return win


def control(buy, n):
    sample = [simu(buy) for _ in range(n)]
    return sample

In [None]:
# determine the maximum profit
wins = np.zeros(100)
for i in range(100):
    wins[i] = np.mean(control(i, 10000))
max_win = wins.argmax(axis=0)
print(f'Maximum profit with {max_win} flowers bought daily')

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Seaborn settings for a more attractive plot
sns.set_style('whitegrid')

# Plot the wins
plt.figure(figsize=(10, 6))
sns.lineplot(x=range(len(wins[:30])), y=wins[:30])

# Red line at 7
plt.axvline(x=7, color='r', linestyle='--', label='Max Win')

# Black line at 0
plt.axhline(y=0, color='black', linestyle='--', label='Profit Line')

# Determine upper threshold for flowers to buy
def zero_crossing(wins):
    for i in range(1, len(wins) - 1):
        if wins[i] <= 0 <= wins[i - 1]:
            return i - 1
    return -1

upper_threshold = zero_crossing(wins)
print(f'Upper limit of flowers to buy: {upper_threshold}')

# Green line at upper threshold
plt.axvline(x=upper_threshold, color='g', linestyle='--', label='Upper Profit Threshold')

# Enhancements: labels and legend
plt.title('Win Over Flower Purchase')
plt.xlabel('Flower')
plt.ylabel('Win')
plt.legend()
# Setting x-ticks every 2 units
plt.xticks(range(0, len(wins[:30])+1))

plt.show()

### Excercise 32
Use your implementation of the previous example and graphically visualize the confidence interval in dependency of the number of simulated days (plot the mean profit, upper and lower CI). How many days must the number of simulated days be in order to reach an accuracy of the estimator of ±1% with a 99% confidence?

In [None]:
# Determine the right n for the control function
def determine_n():
    alpha = 0.01
    confidence = 0.99
    n_target = (stats.norm.ppf(1 - ((1 - confidence) / 2)) / (alpha * 2)) ** 2
    return int(np.ceil(n_target))


n = determine_n()
print(f'Number of simulated days needed for 99% confidence and 1% accuracy: {n}')

In [None]:
# INDIVIDUALLY ADD ADDITIONAL TESTS WITH DIFFERENT N!!
l_arr = []
mu_arr = []
trails = [500, 5000, 10000, n, 30000]
for number in trails:
    alpha = 0.01
    confidence = 0.99

    # inv phi
    k = stats.t.ppf(1 - (alpha / 2), number - 1)

    result = control(max_win, number)

    # Sample mean
    mu_hat = np.mean(result)
    mu_arr.append(mu_hat)
    sigma = np.std(result, ddof=1)
    l = (k * sigma / (np.sqrt(number)))
    l_arr.append(l)
    ci_lower = mu_hat - l
    ci_upper = mu_hat + l
    print(f'N: {number}')
    print('Lower ci boundry: {:.2f}'.format(ci_lower))
    print(f'Mean: {mu_hat}')
    print('Upper ci boundry: {:.2f}'.format(ci_upper))

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Seaborn settings for a more attractive plot
sns.set_style("whitegrid")

# Create a new figure
plt.figure(figsize=(10, 6))

# Create barplot with error bars (confidence intervals)
sns.barplot(x=trails, y=mu_arr, yerr=l_arr, capsize=0.1, errwidth=10, color='skyblue')

# Adding labels and title
plt.xlabel('N in Simulation')
plt.ylabel('mu_hat')
plt.title('Bar Plot with Confidence Intervals')

# Displaying the plot
plt.show()

### Exercise 33
The flower shop from exercise 31 builds a cold storage. Hence, the shop is able to sell flowers for one single day after they were purchased for a price of 5 Euro, but only if all flowers (the ones purchased for the day) already have been sold and there is further demand.
Analyze if the introduction of the cold storage did cause a significant change on the profits expectation value at a daily flower purchase of 5 or 10 flowers (1% level of significance). At what approximate sample size (number of simulated days) is the statistical test able to recognize the additional returns? Justify the choice of the appropriate statistical test.

In [None]:
LEFT_OVER_PREV_DAY = 0
def simu_old_flowers(flowers):
    global LEFT_OVER_PREV_DAY
    bought_price = 3
    sell_price = 7
    sell_price_old = 5
    win = -bought_price * flowers
    a_demand = demand()  # how many flowers where requested

    if a_demand <= flowers:
        win = win + sell_price * a_demand  #sell them
        left_flowers = flowers - a_demand
        LEFT_OVER_PREV_DAY = left_flowers
    else:
        win = win + sell_price * flowers  # sell them all
        remaining_demand = a_demand - flowers
        if remaining_demand <= LEFT_OVER_PREV_DAY:
            win = win + sell_price_old * remaining_demand  #sell the remaining demand with reduced price
        else:
            win = win + sell_price_old * LEFT_OVER_PREV_DAY  #sell all old flowers
    return win


def control_old_flowers(buy, n):
    sample = [simu_old_flowers(buy) for _ in range(n)]
    return sample

In [None]:
# determine the maximum profit
wins_old = np.zeros(100)
for i in range(3, 100):
    wins_old[i] = np.mean(control_old_flowers(i, 10000))
max_win_old_flowers = wins_old.argmax(axis=0)
print(f'Maximum profit with {max_win_old_flowers} flowers bought daily')

In [None]:
# Seaborn settings for a more attractive plot
sns.set_style("whitegrid")

# Create a new figure
plt.figure(figsize=(10, 6))

# Plot the 'wins' and 'wins_old' arrays
sns.lineplot(x=range(len(wins[:30])), y=wins[:30], color='blue', label='Wins no cold storage')
sns.lineplot(x=range(len(wins_old[:30])), y=wins_old[:30], color='red', label='Wins selling old flowers')

# Red line at max_win_old_flowers
plt.axvline(x=max_win_old_flowers, color='g', linestyle='--', label='Max Old Wins')

# Black line at max_win
plt.axhline(y=0, color='black', linestyle='--', label='Profit Line')

# Adding labels and title
plt.title('Win Over Flower Purchase')
plt.xlabel('Flower')
plt.ylabel('Win')
plt.legend()
plt.xticks(range(0, len(wins[:30])+1))

# Displaying the plot
plt.show()

#### Statistical Test (real assignment)

##### Declare the 2 datasets for testing (with high number of n)

In [None]:
result_no_old_flowers_5 = control(5, 100_000)
results_old_flowers_5 = control_old_flowers(5, 100_000)

result_no_old_flowers_10 = control(10, 100_000)
results_old_flowers_10 = control_old_flowers(10, 100_000)

In [None]:
def h0_check_profits_different(result_no_old_flowers, results_old_flowers, outputs_on=True):
    # paired observations (before and after) for the same group of individuals --> paired t-test

    #check for normal distribution
    _, p_b = shapiro(results_old_flowers)
    _, p_a = shapiro(result_no_old_flowers)
    # interpret
    alpha = 0.05
    if p_b > alpha and p_a > alpha:
        # Perform the paired t-test
        if outputs_on: print('Data looks normal distributed')
        t_statistic, p_value = ttest_rel(results_old_flowers, result_no_old_flowers)
    else:
        if outputs_on: print('Data not normal distributed')
        p_value = mannwhitneyu(results_old_flowers, result_no_old_flowers).pvalue

    # Set the significance level
    significance_level = 0.01

    if outputs_on: print('p = ', p_value, " | a = ", significance_level)

    # Compare the p-value with the significance level
    # Ho = The cold storage does not change the profits
    if p_value < significance_level:
        if outputs_on: print("Reject the null hypothesis. The cold storage makes the profits significantly different.")
        return True
    else:
        if outputs_on: print("Fail to reject the null hypothesis. The cold storage makes the profits not significantly different")
        return False

In [None]:
test_days = range(3, 100_000)

In [None]:
#5 FLOWERS
print('Check the null hypothesis with 5 flowers and determine n where it gets rejected')
result_5 = 0
for days in test_days:
    is_rejected = h0_check_profits_different(result_no_old_flowers_5[:days], results_old_flowers_5[:days], False)
    if is_rejected:
        result_5 = days
        print(f'Result n for rejecting the H0 with 5 flowers: {result_5}')
        break
if not is_rejected:
    print('Range was not enough, or no difference determinable!')

In [None]:
#10 FLOWERS
print('Check the null hypothesis with 10 flowers and determine n where it gets rejected')
result_10 = 0
for days in test_days:
    is_rejected = h0_check_profits_different(result_no_old_flowers_10[:days], results_old_flowers_10[:days], False)
    if is_rejected:
        result_10 = days
        print(f'Result n for rejecting the H0 with 10 flowers: {result_10}')
        break
if not is_rejected:
    print('Range was not enough, or no difference determinable!')