# Machine Problem: Conditional Probability (Data Generation)

This [Python](https://www.python.org) challenge uses [numpy](https://numpy.org/) and its `random` module to generate pseudo random numbers.

In [None]:
import numpy as np
from numpy.random import randint
from numpy.random import binomial
import pandas as pd
import matplotlib.pyplot as plt

The block below comes from the challenge itself.

In [None]:
def jar_seq(jar_number, seq_length=1):
    # Return random drawings from specified jar
    #
    if jar_number == 0:
        prob_one = 192 / 384
    elif jar_number == 1:
        prob_one = 256 / 384
    else:
        prob_one = 0
    outcomes = binomial(1, prob_one, seq_length)
    return outcomes

def jar_rnd_seq(seq_length=1):
    # Return random drawings from Jar 0 or Jar 1
    #
    jar_rnd_labels = binomial(1, 0.5, seq_length)
    jar0_out = jar_seq(0, seq_length)
    jar1_out = jar_seq(1, seq_length)
    outcomes = np.multiply(jar_rnd_labels,jar0_out) + np.multiply(np.ones(jar_rnd_labels.shape) - jar_rnd_labels,jar1_out)
    return outcomes.astype(int), jar_rnd_labels

The block below generates the data files for this machine problem.

In [None]:
seq_length = 2000

jar_outcomes1, jar_labels1 = jar_rnd_seq(seq_length)

data_observation1 = {'O1': jar_outcomes1}
df_observation1 = pd.DataFrame(data=data_observation1)
df_observation1.to_csv("input1.csv")

decision1 = np.zeros(jar_labels1.shape).astype(int)
data_decision1 = {'decision1': decision1}
df_decision1 = pd.DataFrame(data=data_decision1)
df_decision1.to_csv("output1.csv")

data_labels1 = {'solution1': jar_labels1}
df_labels1 = pd.DataFrame(data=data_labels1)
df_labels1.to_csv("labels1.csv")


df_observation16 = pd.DataFrame(columns=('O1','O2','O3','O4','O5','O6','O7','O8','O9','O10','O11','O12','O13','O14','O15','O16'))
jar_labels16 = binomial(1, 0.5, seq_length)
for idx in range(len(jar_labels16)):
    observation_seq = jar_seq(jar_labels16[idx], 16)
    df_observation16.loc[idx] = list(observation_seq)
df_observation16.to_csv("input16.csv")

decision16 = np.zeros(jar_labels16.shape).astype(int)
data_decision16 = {'decision16': decision16}
df_decision16 = pd.DataFrame(data=data_decision16)
df_decision16.to_csv("output1.csv")

data_labels16 = {'solution16': jar_labels16}
df_labels16 = pd.DataFrame(data=data_labels16)
df_labels16.to_csv("labels16.csv")