In [1]:
%matplotlib notebook

In [2]:
# Dependencies
from matplotlib import pyplot as plt
from scipy import stats
import numpy as np
import pandas as pd

In [3]:
# Load observed data
observed = pd.read_csv("skittles.csv")
observed.head(5)

Unnamed: 0,bag_number,purple,green,red,yellow,orange,total_perf,d_purple,d_green,d_red,d_yellow,d_orange,total_def,bag_total
0,1,3,25,6,14,6,54,0,0,2,1,1,4,58
1,2,12,11,10,14,9,56,0,1,2,1,4,8,64
2,3,8,13,6,17,10,54,0,5,0,1,1,7,61
3,4,13,9,12,9,10,53,1,0,2,1,1,5,58
4,5,13,7,8,13,11,52,0,0,1,1,3,5,57


In [4]:
# Observed data calculations
observed['perf'] = observed['purple'] + observed['green'] + observed['red'] + observed['yellow'] + observed['orange']
observed['def'] = observed['d_purple'] + observed['d_green'] + observed['d_red'] + observed['d_yellow'] + observed['d_orange']
observed['total'] = observed['perf'] + observed['def']
observed.head(5) 

Unnamed: 0,bag_number,purple,green,red,yellow,orange,total_perf,d_purple,d_green,d_red,d_yellow,d_orange,total_def,bag_total,perf,def,total
0,1,3,25,6,14,6,54,0,0,2,1,1,4,58,54,4,58
1,2,12,11,10,14,9,56,0,1,2,1,4,8,64,56,8,64
2,3,8,13,6,17,10,54,0,5,0,1,1,7,61,54,7,61
3,4,13,9,12,9,10,53,1,0,2,1,1,5,58,53,5,58
4,5,13,7,8,13,11,52,0,0,1,1,3,5,57,52,5,57


In [5]:
# Load simulated data
simulated = pd.read_csv("skittles_ml.csv")
simulated.head(5)

Unnamed: 0,bag_number,purple,green,red,yellow,orange,total_perf,d_purple,d_green,d_red,d_yellow,d_orange,total_def,bag_total
0,1,25,6,14,6,3,54,0,2,1,1,0,4,58
1,2,11,10,14,9,12,56,1,2,1,4,0,8,64
2,3,13,6,17,10,8,54,5,0,1,1,0,7,61
3,4,9,12,9,10,13,53,0,2,1,1,1,5,58
4,5,7,8,13,11,13,52,0,1,1,3,0,5,57


In [6]:
# Simulated data calculations
simulated['perf'] = simulated['purple'] + simulated['green'] + simulated['red'] + simulated['yellow'] + simulated['orange']
simulated['def'] = simulated['d_purple'] + simulated['d_green'] + simulated['d_red'] + simulated['d_yellow'] + simulated['d_orange']
simulated['total'] = simulated['perf'] + simulated['def']
simulated.head(5) 

Unnamed: 0,bag_number,purple,green,red,yellow,orange,total_perf,d_purple,d_green,d_red,d_yellow,d_orange,total_def,bag_total,perf,def,total
0,1,25,6,14,6,3,54,0,2,1,1,0,4,58,54,4,58
1,2,11,10,14,9,12,56,1,2,1,4,0,8,64,56,8,64
2,3,13,6,17,10,8,54,5,0,1,1,0,7,61,54,7,61
3,4,9,12,9,10,13,53,0,2,1,1,1,5,58,53,5,58
4,5,7,8,13,11,13,52,0,1,1,3,0,5,57,52,5,57


In [7]:
# Observed total
total = observed['total']

In [8]:
# Simulated total
stotal = simulated['total']

In [9]:
# Observed normal
perfect = observed['perf']
p_slope, p_int, p_r, p_p, p_std_err = stats.linregress(
    total, perfect)
p_fit = p_slope * total + p_int

In [10]:
# Simulated normal
sperfect = simulated['perf']
sp_slope, sp_int, sp_r, sp_p, sp_std_err = stats.linregress(
    stotal, sperfect)
sp_fit = sp_slope * stotal + sp_int

In [11]:
# Observed defective
defect = observed['def']
d_slope, d_int, d_r, d_p, d_std_err = stats.linregress(
    total, defect)
d_fit = d_slope * total + d_int

In [12]:
# Simulated defective
sdefect = simulated['def']
sd_slope, sd_int, sd_r, sd_p, sd_std_err = stats.linregress(
    stotal, sdefect)
sd_fit = sd_slope * stotal + sd_int

In [13]:
# Plot
fig, (ax1, ax2) = plt.subplots(2, sharex=True)
fig.suptitle("Normal Average - Observed " + str(int(np.mean(perfect))) + " vs Simulated " + str(int(np.mean(sperfect))), fontsize=12, fontweight="bold")

ax1.scatter(total, perfect, c="Red", marker="s")
ax1.scatter(stotal, sperfect, c="Pink", marker="+")
ax1.plot(total, p_fit, "Red", linewidth=1)
ax1.plot(stotal, sp_fit, "Pink", linewidth=1)
ax1.set_ylabel("Normal")

ax2.scatter(total, defect, c="Blue", marker="s")
ax2.scatter(stotal, sdefect, c="Aqua", marker="+")
ax2.plot(total, d_fit, "Blue", linewidth=1)
ax2.plot(stotal, sd_fit, "Aqua", linewidth=1)
ax2.set_ylabel("Defective")

ax2.set_xlabel("Skittles per Bag")

plt.savefig("normal_vs_defective.png")

<IPython.core.display.Javascript object>

In [14]:
# Observed normal Red
red = observed['red']
red_slope, red_int, red_r, red_p, red_std_err = stats.linregress(
    total, red)
red_fit = red_slope * total + red_int

In [15]:
# Simulated normal Red
sred = simulated['red']
sred_slope, sred_int, sred_r, sred_p, sred_std_err = stats.linregress(
    stotal, sred)
sred_fit = sred_slope * stotal + sred_int

In [16]:
# Observed defective Red
dred = observed['d_red']
dred_slope, dred_int, dred_r, dred_p, dred_std_err = stats.linregress(
    total, dred)
dred_fit = dred_slope * total + dred_int

In [17]:
# Simulated defective Red
sdred = simulated['d_red']
sdred_slope, sdred_int, sdred_r, sdred_p, sdred_std_err = stats.linregress(
    stotal, sdred)
sdred_fit = sdred_slope * stotal + sdred_int

In [18]:
# Plot
fig, (ax1, ax2) = plt.subplots(2, sharex=True)
fig.suptitle("Strawberry Average - Observed " + str(int(np.mean(red))) + " vs Simulated " + str(int(np.mean(sred))), fontsize=12, fontweight="bold")

ax1.scatter(total, red, c="DarkRed", marker="s")
ax1.scatter(stotal, sred, c="Red", marker="+")
ax1.plot(total, red_fit, "DarkRed", linewidth=1)
ax1.plot(stotal, sred_fit, "Red", linewidth=1)
ax1.set_ylabel("Normal")

ax2.scatter(total, dred, c="DarkRed", marker="s")
ax2.scatter(stotal, sdred, c="Red", marker="+")
ax2.plot(total, dred_fit, "DarkRed", linewidth=1)
ax2.plot(stotal, sdred_fit, "Red", linewidth=1)
ax2.set_ylabel("Defective")

ax2.set_xlabel("Skittles per Bag")

plt.savefig("strawberry_normal_vs_defective.png")

<IPython.core.display.Javascript object>

In [19]:
# Observed Normal Yellow
yel = observed['yellow']
yel_slope, yel_int, yel_r, yel_p, yel_std_err = stats.linregress(
    total, yel)
yel_fit = yel_slope * total + yel_int

In [20]:
# Simulated Normal Yellow
syel = simulated['yellow']
syel_slope, syel_int, syel_r, syel_p, syel_std_err = stats.linregress(
    stotal, syel)
syel_fit = syel_slope * stotal + syel_int

In [21]:
# Observed Defective Yellow
dyel = observed['d_yellow']
dyel_slope, dyel_int, dyel_r, dyel_p, dyel_std_err = stats.linregress(
    total, dyel)
dyel_fit = dyel_slope * total + dyel_int

In [22]:
# Simulated Normal Yellow
sdyel = simulated['d_yellow']
sdyel_slope, sdyel_int, sdyel_r, sdyel_p, sdyel_std_err = stats.linregress(
    stotal, sdyel)
sdyel_fit = sdyel_slope * stotal + sdyel_int

In [23]:
# Plot
fig, (ax1, ax2) = plt.subplots(2, sharex=True)
fig.suptitle("Lemon - Average Observed " + str(int(np.mean(yel))) + " vs Simulated " + str(int(np.mean(syel))), fontsize=12, fontweight="bold")

ax1.scatter(total, yel, c="Goldenrod", marker="s")
ax1.scatter(stotal, syel, c="Gold", marker="+")
ax1.plot(total, yel_fit, "Goldenrod", linewidth=1)
ax1.plot(stotal, syel_fit, "Gold", linewidth=1)
ax1.set_ylabel("Normal")

ax2.scatter(total, dyel, c="Goldenrod", marker="s")
ax2.scatter(stotal, sdyel, c="Gold", marker="+")
ax2.plot(total, dyel_fit, "Goldenrod", linewidth=1)
ax2.plot(stotal, sdyel_fit, "Gold", linewidth=1)
ax2.set_ylabel("Defective")

ax2.set_xlabel("Skittles per Bag")

plt.savefig("lemon_normal_vs_defective.png")

<IPython.core.display.Javascript object>

In [24]:
# Observed Normal Green
gre = observed['green']
gre_slope, gre_int, gre_r, gre_p, gre_std_err = stats.linregress(
    total, gre)
gre_fit = gre_slope * total + gre_int

In [25]:
# Simulated Normal Green
sgre = simulated['green']
sgre_slope, sgre_int, sgre_r, sgre_p, sgre_std_err = stats.linregress(
    stotal, sgre)
sgre_fit = sgre_slope * stotal + sgre_int

In [26]:
# Observed Defective Green
dgre = observed['d_green']
dgre_slope, dgre_int, dgre_r, dgre_p, dgre_std_err = stats.linregress(
    total, dgre)
dgre_fit = dgre_slope * total + dgre_int

In [27]:
# Simulated Defective Green
sdgre = simulated['d_green']
sdgre_slope, sdgre_int, sdgre_r, sdgre_p, sdgre_std_err = stats.linregress(
    total, sdgre)
sdgre_fit = sdgre_slope * stotal + sdgre_int

In [28]:
# Plot
fig, (ax1, ax2) = plt.subplots(2, sharex=True)
fig.suptitle("Green Apple Average - Observed " + str(int(np.mean(gre))) + " vs Simulated " + str(int(np.mean(sgre))), fontsize=12, fontweight="bold")

ax1.scatter(total, gre, c="Green", marker="s")
ax1.scatter(stotal, sgre, c="LightGreen", marker="+")
ax1.plot(total, gre_fit, "Green", linewidth=1)
ax1.plot(stotal, sgre_fit, "LightGreen", linewidth=1)
ax1.set_ylabel("Normal")

ax2.scatter(total, dgre, c="Green", marker="s")
ax2.scatter(stotal, sdgre, c="LightGreen", marker="+")
ax2.plot(total, dgre_fit, "Green", linewidth=1)
ax2.plot(stotal, sdgre_fit, "LightGreen", linewidth=1)
ax2.set_ylabel("Defective")

ax2.set_xlabel("Skittles per Bag")

plt.savefig("green_apple_normal_vs_defective.png")

<IPython.core.display.Javascript object>

In [29]:
# Observed Normal Orange
ora = observed['orange']
ora_slope, ora_int, ora_r, ora_p, ora_std_err = stats.linregress(
    total, ora)
ora_fit = ora_slope * total + ora_int

In [30]:
# Simulated Nomral Orange
sora = simulated['orange']
sora_slope, sora_int, sora_r, sora_p, sora_std_err = stats.linregress(
    stotal, sora)
sora_fit = sora_slope * total + sora_int

In [31]:
# Observed Defective Orange
dora = observed['d_orange']
dora_slope, dora_int, dora_r, dora_p, dora_std_err = stats.linregress(
    total, dora)
dora_fit = dora_slope * total + dora_int

In [32]:
# Simulated Defective Orange
sdora = simulated['d_orange']
sdora_slope, sdora_int, sdora_r, sdora_p, sdora_std_err = stats.linregress(
    stotal, sdora)
sdora_fit = sdora_slope * total + sdora_int

In [33]:
# Plot
fig, (ax1, ax2) = plt.subplots(2, sharex=True)
fig.suptitle("Orange Average - Observed " + str(int(np.mean(ora))) + " vs Simulated " + str(int(np.mean(sora))), fontsize=12, fontweight="bold")

ax1.scatter(total, ora, c="DarkOrange", marker="s")
ax1.scatter(stotal, sora, c="Orange", marker="+")
ax1.plot(total, ora_fit, "DarkOrange", linewidth=1)
ax1.plot(stotal, sora_fit, "Orange", linewidth=1)
ax1.set_ylabel("Normal")

ax2.scatter(total, dora, c="DarkOrange", marker="s")
ax2.scatter(stotal, sdora, c="Orange", marker="+")
ax2.plot(total, dora_fit, "DarkOrange", linewidth=1)
ax2.plot(stotal, sdora_fit, "Orange", linewidth=1)
ax2.set_ylabel("Defective")

ax2.set_xlabel("Skittles per Bag")

plt.savefig("orange_normal_vs_defective.png")

<IPython.core.display.Javascript object>

In [34]:
# Observed Normal Purple
pur = observed['purple']
pur_slope, pur_int, pur_r, pur_p, pur_std_err = stats.linregress(
    total, pur)
pur_fit = pur_slope * total + pur_int

In [35]:
# Simulated Normal Purple
spur = simulated['purple']
spur_slope, spur_int, spur_r, spur_p, spur_std_err = stats.linregress(
    stotal, spur)
spur_fit = spur_slope * total + spur_int

In [36]:
# Observed Defective Purple
dpur = observed['d_purple']
dpur_slope, dpur_int, dpur_r, dpur_p, dpur_std_err = stats.linregress(
    total, dpur)
dpur_fit = dpur_slope * total + dpur_int

In [37]:
# Simulated Defective Purple
sdpur = simulated['d_purple']
sdpur_slope, sdpur_int, sdpur_r, sdpur_p, sdpur_std_err = stats.linregress(
    stotal, sdpur)
sdpur_fit = sdpur_slope * total + sdpur_int

In [38]:
# Plot
fig, (ax1, ax2) = plt.subplots(2, sharex=True)
fig.suptitle("Grape Average - Observed " + str(int(np.mean(pur)))  + " vs Simulated " + str(int(np.mean(spur))), fontsize=12, fontweight="bold")

ax1.scatter(total, pur, c="Purple", marker="s")
ax1.scatter(stotal, spur, c="MediumPurple", marker="+")
ax1.plot(total, pur_fit, "Purple", linewidth=1)
ax1.plot(stotal, spur_fit, "MediumPurple", linewidth=1)
ax1.set_ylabel("Normal")

ax2.scatter(total, dora, c="Purple", marker="s")
ax2.scatter(stotal, sdora, c="MediumPurple", marker="+")
ax2.plot(total, dora_fit, "Purple", linewidth=1)
ax2.plot(stotal, sdora_fit, "MediumPurple", linewidth=1)
ax2.set_ylabel("Defective")

ax2.set_xlabel("Skittles per Bag")

plt.savefig("grape_normal_vs_defective.png")

<IPython.core.display.Javascript object>