<a href="https://colab.research.google.com/github/RoetGer/decisions-under-uncertainty/blob/main/data_science_and_stochastic_programming.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install cvxpy
!pip install cvxstoc

Collecting cvxstoc
  Downloading https://files.pythonhosted.org/packages/ad/0d/6e47ddb7c55a35c765dc6ddad5b4cc9ade7a0b90fbfa692bf1120819b1d4/cvxstoc-0.2.2-py3-none-any.whl
Collecting pymc>=2.3.4
[?25l  Downloading https://files.pythonhosted.org/packages/37/81/9a222c38c65019de9ad5a1ee2448cc4a9b5f7a64eeaf246c77f81c0e6f94/pymc-2.3.8.tar.gz (385kB)
[K     |████████████████████████████████| 389kB 5.1MB/s 
Building wheels for collected packages: pymc
  Building wheel for pymc (setup.py) ... [?25l[?25hdone
  Created wheel for pymc: filename=pymc-2.3.8-cp37-cp37m-linux_x86_64.whl size=1352876 sha256=20e1a278ff2fab7b48c1f7fda574c7f60290e96a5dbabd7b2bdd777133099866
  Stored in directory: /root/.cache/pip/wheels/0b/a8/e7/8f3ba91a39294d538a92db052fd1fcba1fca74a58c8b022026
Successfully built pymc
Installing collected packages: pymc, cvxstoc
Successfully installed cvxstoc-0.2.2 pymc-2.3.8


# Data Science and Stochastic Programming

In this notebook we explore, how stochastic programming can be used to incorporate uncertainty stemming from data science models into our decision making process.

In [2]:
import cvxstoc
from cvxstoc import NormalRandomVariable, expectation, prob
from cvxpy import Maximize, Problem
from cvxpy.expressions.variable import Variable
import numpy as np
import pymc

# Samples to be taken
num_samples = 100

# Create problem data.
n = 10
mu = np.zeros(n)
Sigma = 0.1*np.eye(n)
p = NormalRandomVariable(mu, Sigma)
alpha = -1
beta = 0.05

# Create and solve stochastic optimization problem.
x = Variable(n)
p = Problem(
    Maximize(expectation(x.T*p, num_samples=num_samples)),
    [
      x >= 0, 
      x.T*np.ones(n) == 1,
      prob(x.T*p <= alpha, num_samples=num_samples) <= beta
    ]
)

p.solve()

  import pandas.util.testing as tm


0.07499269523236736

In [6]:
p.variables()

[Variable((10,)), Variable((), nonneg=True)]

In [6]:
tau = np.array([[1., 0.5], [0.5, 2]])
mu = np.zeros(2)
vals = pymc.MvNormal('vals', mu=mu, tau=tau)

In [7]:
samples = np.random.normal(size=(100,2))
samples[:10]

array([[-1.22204774, -0.80502631],
       [-1.16277366, -1.75859717],
       [-0.53529025,  0.41305981],
       [-0.08021858,  0.35732583],
       [ 1.45776074,  0.70228566],
       [ 0.18337108, -0.13817061],
       [ 2.36300253,  0.74465998],
       [-0.20922131,  0.87274293],
       [ 0.30309349, -1.09639681],
       [ 0.6381617 ,  1.94197565]])

In [15]:
np.random.choice

array([[-1.5156057 , -0.94352391],
       [ 1.26497755, -0.49662183],
       [-0.3138236 ,  0.7154132 ]])

In [8]:
from cvxstoc import RandomVariable, RandomVariableFactory

In [None]:
??cvxstoc

In [None]:
??RandomVariable

In [4]:
??NormalRandomVariable

In [6]:
??RandomVariableFactory

In [5]:
??RandomVariableFactory.create_normal_rv

In [None]:
pymc_rv = pymc.stochastic_from_data(
    name="Empirical", 
    data=np.random.normal(size=(100,)), 
    lower=-np.inf, 
    upper=np.inf)

In [None]:
pymc_rv.random()

0.46706672849725195

In [None]:
??pymc.Normal

In [None]:
pymc.Normal(name="blub", mu=2., tau=1., size=(3,))

<pymc.distributions.new_dist_class.<locals>.new_class 'blub' at 0x7f7c9249aad0>

In [None]:
??RandomVariable

In [12]:
def EmpiricalRandomVariable(samples, mu, interpolate=False):
    return create_empirical_rv(samples, mu, interpolate=interpolate)

def create_empirical_rv(samples, 
                        mu, 
                        interpolate=False, 
                        lower=-np.inf, 
                        upper=np.inf):
    rv_name = "empiricial_placeholder"

    if interpolate:
      rv_pymc = pymc.stochastic_from_data(
          name=rv_name, 
          data=samples, 
          lower=lower, 
          upper=upper)
    else:
      pass
    
    
    metadata = {
      "mu": mu
    }

    return RandomVariable(rv=rv_pymc, metadata=metadata)

samples = np.random.normal(size=(100,))
mu = np.mean(samples)

erv = EmpiricalRandomVariable(samples, mu, interpolate=True)


In [45]:
??pymc.stochastic_from_data

In [13]:
from functools import partial

rv_name = "placeholder"

n = samples.shape[0]
mv = len(samples.shape) > 1

@pymc.randomwrap
def random_sample(lower, upper, size=None):
    ridx = np.random.randint(low=lower, high=upper, size=size)
    return ridx #samples[ridx]

rv_pymc = pymc.stochastic_from_dist(
    name=rv_name,
    logp=pymc.discrete_uniform_like,
    random=random_sample,
    mv=mv)

In [88]:
import numpy as np
import pymc


def EmpiricalRandomVariable(name, 
                            samples,
                            interpolate=False, 
                            lower=float('-inf'), 
                            upper=float('inf')):
    '''
    Create a pymc node whose distribution comes either from a kernel smoothing density estimate or from
    the empirical samples.'''
    
    if interpolate:
      rv_pymc = pymc.stochastic_from_data(
          name=rv_name, 
          data=samples, 
          lower=lower, 
          upper=upper)
    else:
        nobs = samples.shape[0]

        def logp(value):
            return -np.log(nobs)

        def random():
            ridx = np.random.randint(low=0, high=nobs, size=1)
            return samples[ridx]

        value = random() 
        dtype = type(value)
    
        rv_pymc = pymc.Stochastic(
            logp = logp,
            doc = "A node which bootstrap samples from the provided dataset",
            name = name,
            parents = {},
            random = random,
            trace = True,
            dtype = dtype)
    
    return rv_pymc

In [46]:
??pymc.Stochastic

In [92]:
ksv = EmpiricalRandomVariable("EmpiricalRV", 
                              np.random.normal(size=(100,)),
                              interpolate=False)
alpha = -1
beta = 0.05
n =  1

# Create and solve stochastic optimization problem.
x = Variable(n, name="x")
p = Problem(
    Maximize(expectation(x*ksv, num_samples=num_samples)),
    [
      x >= 0, 
     # x*np.ones(n) == 1,
      prob(x*ksv <= alpha, num_samples=num_samples) <= beta
    ]
)

p.solve()

inf

In [55]:
pymc.discrete_uniform_like(5, 0, n-1)

-1.7976931348623157e+308

In [14]:
n = 10
mut = np.zeros(n)
Sigma = 0.1*np.eye(n)
nrv = NormalRandomVariable(mut, Sigma)

In [44]:
import scipy
??scipy.stats.kde.gaussian_kde

In [41]:
??RandomVariableFactory().create_normal_rv

In [19]:
dir(nrv)

['H',
 'PARAM_COUNT',
 'T',
 '_Expression__is_zero',
 '__abstractmethods__',
 '__add__',
 '__array_priority__',
 '__class__',
 '__deepcopy__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__div__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lshift__',
 '__lt__',
 '__matmul__',
 '__module__',
 '__mul__',
 '__ne__',
 '__neg__',
 '__new__',
 '__pow__',
 '__radd__',
 '__rdiv__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__rlshift__',
 '__rmatmul__',
 '__rmul__',
 '__rrshift__',
 '__rshift__',
 '__rsub__',
 '__rtruediv__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__sub__',
 '__subclasshook__',
 '__truediv__',
 '__weakref__',
 '_abc_impl',
 '_get_attr_str',
 '_metadata',
 '_model',
 '_name',
 '_rv',
 '_shape',
 '_val_map',
 '_validate_value',
 '_value',
 '_value_impl',
 'args',
 'atoms',
 'attributes',
 'boolean_idx',
 'canonical_form',
 'canonicalize',
 'cast_to_const

In [18]:
??cvxstoc.random_variable.RandomVariable

In [93]:
rv_pymc("asdf", lower=0, upper=n-1)

<pymc.distributions.new_dist_class.<locals>.new_class 'asdf' at 0x7f70908a9890>

In [68]:
Exponential = pymc.stochastic_from_dist('exponential',
                                    logp=pymc.exponential_like,
                                    random=pymc.rexponential,
                                    dtype=np.float,
                                    mv=False)
A = Exponential("sdfasd", 5., beta=2.)

TypeError: ignored

In [74]:
Exponential("sdasa", beta=2., value=5.)

<pymc.distributions.new_dist_class.<locals>.new_class 'sdasa' at 0x7f7090911b90>

In [70]:
@pymc.stochastic(dtype=int)
def switchpoint(value=1900, t_l=1851, t_h=1962):
    """The switchpoint for the rate of disaster occurrence."""

    def logp(value, t_l, t_h):
        if value > t_h or value < t_l:
            return -np.inf
        else:
            return -np.log(t_h - t_l + 1)

    def random(t_l, t_h):
        from numpy.random import random
        return np.round( (t_l - t_h) * random() ) + t_l

switchpoint(value=1900, t_l=1851, t_h=1962).random()

TypeError: ignored

In [45]:
pymc.Normal

In [48]:
??pymc.stochastic_from_dist

In [72]:
pymc.exponential_like(4., 2.)

-7.306852819440055

In [40]:
rv_pymc(name="dsfs", lower=0, upper=n-1).random()

TypeError: ignored

In [41]:
pymc.DiscreteUniform(name="bla", lower=0, upper=2).random()

array(0)

In [21]:
samples[np.random.randint(low=0, high=2, size=1)]

array([[-1.65861409,  0.96750151]])

In [38]:
??pymc.stochastic_from_dist

In [43]:
??pymc.discrete_uniform_like

In [23]:
p = NormalRandomVariable(mu, Sigma)
x.T*p
expectation(x.T*p, num_samples=num_samples)



Expression(AFFINE, UNKNOWN, (10,))

In [24]:
p = EmpiricalRandomVariable(samples, np.mean(samples))
x.T*p
expectation(x*p, num_samples=num_samples)



Expression(AFFINE, UNKNOWN, (1,))

In [34]:
p = EmpiricalRandomVariable(samples, np.mean(samples))
alpha = -1
beta = 0.05
n =  1

# Create and solve stochastic optimization problem.
x = Variable(n, name="x")
p = Problem(
    Maximize(expectation(x*p, num_samples=num_samples)),
    [
      x >= 0, 
     # x*np.ones(n) == 1,
      prob(x*p <= alpha, num_samples=num_samples) <= beta
    ]
)

p.solve()



-3.991545276504316e-12

In [35]:
np.mean(samples)

-0.2177907409688715

In [37]:
import scipy as scp

scp.stats.norm.cdf(-1)

0.15865525393145707

In [33]:
for variable in p.variables():
    print("Variable %s: value %s" % (variable.name(), variable.value))

Variable x: value [2.42765387e-11]
Variable var48963: value 1.012806553248664
