## Synthetic Data Generator

This script generates synthetic customer (student) data based on our survery results. We generate a student pool large enough so that we can bootstroop (stochastic demand) for our optimization problem.

In [None]:
import numpy as np
import pandas as pd

We import the survey results in the following cells.

In [None]:
drink_values_reg = [i for i in range(7)]
food_values_reg = [0,1,2,3]

beer_prob_reg = [0.1,0.1,0.3,0.3,0.15,0.03,0.02]
nonalcohol_prob_reg = [0.1,0.1,0.2,0.35,0.2,0.03,0.02]

burger_prob_reg = [0.4,0.3,0.2,0.1]
hotdog_prob_reg = [0.3,0.4,0.2,0.1]

In [None]:
drink_values_crz = [2*i for i in range(7)]
food_values_crz = [0,2,4,6]

beer_prob_crz = [0.05,0.05,0.1,0.15,0.15,0.3,0.2]
nonalcohol_prob_crz = [0.2,0.1,0.4,0.2,0.05,0.03,0.02]

burger_prob_crz = [0.4,0.3,0.2,0.1]
hotdog_prob_crz = [0.3,0.4,0.2,0.1]

In [None]:
time_window_values = [1,0]
time_window1_prob_reg = [0.3,0.7]
time_window2_prob_reg = [0.45,0.55]
time_window3_prob_reg = [0.75,0.25]

In [None]:
time_window1_prob_crz = [0.6,0.4]
time_window2_prob_crz = [0.75,0.25]
time_window3_prob_crz = [0.9,0.1]

In [None]:
def discrete_prob_generator(values, probabilities, size):
    
    res = np.random.choice(values, size, p=probabilities)
    
    return res

In [None]:
n = 1950
df_students_reg = pd.DataFrame(index=np.arange(n), columns=["BEER"])

df_students_reg["BEER"] = discrete_prob_generator(drink_values_reg,beer_prob_reg, n)
df_students_reg["NONALCOHOL"] = discrete_prob_generator(drink_values_reg,nonalcohol_prob_reg, n)
df_students_reg["BURGER"] = discrete_prob_generator(food_values_reg,burger_prob_reg, n)
df_students_reg["HOTDOG"] = discrete_prob_generator(food_values_reg,hotdog_prob_reg, n)
df_students_reg["TW1"] = discrete_prob_generator(time_window_values,time_window1_prob_reg, n)
df_students_reg["TW2"] = discrete_prob_generator(time_window_values,time_window2_prob_reg, n)
df_students_reg["TW3"] = discrete_prob_generator(time_window_values,time_window3_prob_reg, n)

In [None]:
df_students_reg.head(5)

Unnamed: 0,BEER,NONALCOHOL,BURGER,HOTDOG,TW1,TW2,TW3
0,3,0,1,2,0,0,1
1,4,4,2,2,0,1,1
2,4,3,3,1,1,0,1
3,3,3,2,0,0,0,1
4,4,4,1,1,0,0,1


In [None]:
m = 3000 - n
df_students_crz = pd.DataFrame(index=np.arange(m), columns=["BEER"])

df_students_crz["BEER"] = discrete_prob_generator(drink_values_crz,beer_prob_crz, m)
df_students_crz["NONALCOHOL"] = discrete_prob_generator(drink_values_crz,nonalcohol_prob_crz, m)
df_students_crz["BURGER"] = discrete_prob_generator(food_values_crz,burger_prob_crz, m)
df_students_crz["HOTDOG"] = discrete_prob_generator(food_values_crz,hotdog_prob_crz, m)
df_students_crz["TW1"] = discrete_prob_generator(time_window_values,time_window1_prob_crz, m)
df_students_crz["TW2"] = discrete_prob_generator(time_window_values,time_window2_prob_crz, m)
df_students_crz["TW3"] = discrete_prob_generator(time_window_values,time_window3_prob_crz, m)

In [None]:
df_students_crz.head(5)

Unnamed: 0,BEER,NONALCOHOL,BURGER,HOTDOG,TW1,TW2,TW3
0,4,4,0,0,0,0,0
1,10,10,2,2,0,0,1
2,6,4,0,0,0,0,1
3,8,4,0,2,0,1,1
4,2,4,0,2,0,1,1


In [None]:

df_students = pd.concat([df_students_reg, df_students_crz], ignore_index=True)
df_students = df_students[df_students.TW1 + df_students.TW2 + df_students.TW3 > 0]


In [None]:
df_students.to_csv("students.csv")