<a href="https://colab.research.google.com/github/RoetGer/decisions-under-uncertainty/blob/main/data_science_and_stochastic_programming.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install cvxpy
!pip install cvxstoc

Collecting cvxstoc
  Downloading https://files.pythonhosted.org/packages/ad/0d/6e47ddb7c55a35c765dc6ddad5b4cc9ade7a0b90fbfa692bf1120819b1d4/cvxstoc-0.2.2-py3-none-any.whl
Collecting pymc>=2.3.4
[?25l  Downloading https://files.pythonhosted.org/packages/37/81/9a222c38c65019de9ad5a1ee2448cc4a9b5f7a64eeaf246c77f81c0e6f94/pymc-2.3.8.tar.gz (385kB)
[K     |████████████████████████████████| 389kB 5.1MB/s 
Building wheels for collected packages: pymc
  Building wheel for pymc (setup.py) ... [?25l[?25hdone
  Created wheel for pymc: filename=pymc-2.3.8-cp37-cp37m-linux_x86_64.whl size=1352892 sha256=240348d2caf4029f1373e32d6b46fa133c08697c2447ad951cebb9205c93cb37
  Stored in directory: /root/.cache/pip/wheels/0b/a8/e7/8f3ba91a39294d538a92db052fd1fcba1fca74a58c8b022026
Successfully built pymc
Installing collected packages: pymc, cvxstoc
Successfully installed cvxstoc-0.2.2 pymc-2.3.8


# Data Science and Stochastic Programming

In this notebook we explore, how stochastic programming can be used to incorporate uncertainty stemming from data science models into our decision making process.

In [2]:
import cvxstoc
from cvxstoc import NormalRandomVariable, expectation, prob
from cvxpy import Maximize, Problem
from cvxpy.expressions.variable import Variable
import numpy as np
import pymc

# Samples to be taken
num_samples = 100

# Create problem data.
n = 10
mu = np.zeros(n)
Sigma = 0.1*np.eye(n)
p = NormalRandomVariable(mu, Sigma)
alpha = -1
beta = 0.05

# Create and solve stochastic optimization problem.
x = Variable(n)
p = Problem(
    Maximize(expectation(x.T*p, num_samples=num_samples)),
    [
      x >= 0, 
      x.T*np.ones(n) == 1,
      prob(x.T*p <= alpha, num_samples=num_samples) <= beta
    ]
)

p.solve()

  import pandas.util.testing as tm


0.04516601894581204

In [6]:
tau = np.array([[1., 0.5], [0.5, 2]])
mu = np.zeros(2)
vals = pymc.MvNormal('vals', mu=mu, tau=tau)

In [12]:
samples = np.random.normal(size=(100,2))
samples[:10]

array([[-1.65861409,  0.96750151],
       [-1.5156057 , -0.94352391],
       [ 0.31158114,  1.29548995],
       [ 1.26497755, -0.49662183],
       [ 0.4719533 , -0.67025769],
       [-0.3138236 ,  0.7154132 ],
       [-1.07965439,  1.58505181],
       [ 0.89197148,  0.16122934],
       [ 1.21772272,  0.6524755 ],
       [ 0.36731543,  0.31862433]])

In [15]:
np.random.choice

array([[-1.5156057 , -0.94352391],
       [ 1.26497755, -0.49662183],
       [-0.3138236 ,  0.7154132 ]])

In [3]:
from cvxstoc import RandomVariable, RandomVariableFactory

In [None]:
??cvxstoc

In [None]:
??RandomVariable

In [4]:
??NormalRandomVariable

In [6]:
??RandomVariableFactory

In [5]:
??RandomVariableFactory.create_normal_rv

In [None]:
pymc_rv = pymc.stochastic_from_data(
    name="Empirical", 
    data=np.random.normal(size=(100,)), 
    lower=-np.inf, 
    upper=np.inf)

In [None]:
pymc_rv.random()

0.46706672849725195

In [None]:
??pymc.Normal

In [None]:
pymc.Normal(name="blub", mu=2., tau=1., size=(3,))

<pymc.distributions.new_dist_class.<locals>.new_class 'blub' at 0x7f7c9249aad0>

In [None]:
??RandomVariable

In [11]:
def EmpiricalRandomVariable(samples, mu):
    return create_empirical_rv(samples, mu)

def create_empirical_rv(samples, 
                        mu, 
                        interpolate=False, 
                        lower=-np.inf, 
                        upper=np.inf):
    rv_name = "empiricial_placeholder"

    if interpolate:
      rv_pymc = pymc.stochastic_from_data(
          name=rv_name, 
          data=samples, 
          lower=lower, 
          upper=upper)
    else:
    
    
    metadata = {
      "mu": mu
    }

    return RandomVariable(rv=rv_pymc, metadata=metadata)

samples = np.random.normal(size=(100,))
mu = np.mean(samples)

erv = EmpiricalRandomVariable(samples, mu)


In [25]:
from functools import partial

rv_name = "placeholder"

n = samples.shape[0]
mv = len(samples.shape) > 1

def random_sample(self):
    ridx = np.random.randint(low=0, high=n, size=1)
    return samples[ridx]

rv_pymc = pymc.stochastic_from_dist(
    name=rv_name,
    logp=lambda x: -np.log(n),
    random=random_sample,
    mv=mv)

In [26]:
rv_pymc.random()

TypeError: ignored

In [21]:
samples[np.random.randint(low=0, high=2, size=1)]

array([[-1.65861409,  0.96750151]])

In [19]:
samples

array([[-1.65861409,  0.96750151],
       [-1.5156057 , -0.94352391],
       [ 0.31158114,  1.29548995],
       [ 1.26497755, -0.49662183],
       [ 0.4719533 , -0.67025769],
       [-0.3138236 ,  0.7154132 ],
       [-1.07965439,  1.58505181],
       [ 0.89197148,  0.16122934],
       [ 1.21772272,  0.6524755 ],
       [ 0.36731543,  0.31862433],
       [ 0.40330888, -0.63194292],
       [-1.25105646, -2.47115125],
       [ 0.91846478,  0.58322568],
       [ 0.65337692,  0.8399175 ],
       [ 0.77900331, -1.64983168],
       [-2.01430252,  1.72345488],
       [ 0.32516688, -1.15398951],
       [-0.95501668, -1.05520575],
       [-0.69734027,  0.0753211 ],
       [ 0.76962714, -0.68317627],
       [-0.33835759,  0.43405157],
       [-0.42368968, -0.97591641],
       [-0.02800192,  1.02176581],
       [ 0.56868814,  0.15060733],
       [ 0.7386143 , -1.13418321],
       [ 0.27463411,  0.04686005],
       [-0.69590721, -0.69714955],
       [-1.18666709,  0.31495104],
       [-0.320207  ,

In [23]:
p = NormalRandomVariable(mu, Sigma)
x.T*p
expectation(x.T*p, num_samples=num_samples)



Expression(AFFINE, UNKNOWN, (10,))

In [24]:
p = EmpiricalRandomVariable(samples, np.mean(samples))
x.T*p
expectation(x*p, num_samples=num_samples)



Expression(AFFINE, UNKNOWN, (1,))

In [34]:
p = EmpiricalRandomVariable(samples, np.mean(samples))
alpha = -1
beta = 0.05
n =  1

# Create and solve stochastic optimization problem.
x = Variable(n, name="x")
p = Problem(
    Maximize(expectation(x*p, num_samples=num_samples)),
    [
      x >= 0, 
     # x*np.ones(n) == 1,
      prob(x*p <= alpha, num_samples=num_samples) <= beta
    ]
)

p.solve()



-3.991545276504316e-12

In [35]:
np.mean(samples)

-0.2177907409688715

In [37]:
import scipy as scp

scp.stats.norm.cdf(-1)

0.15865525393145707

In [33]:
for variable in p.variables():
    print("Variable %s: value %s" % (variable.name(), variable.value))

Variable x: value [2.42765387e-11]
Variable var48963: value 1.012806553248664
