## Continuous Probabilistic Exercises

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
def get_lower_and_upper_bounds(series, multiplier=1.5):
    q1 = series.quantile(0.25)
    q3 = series.quantile(0.75)
    iqr = q3 - q1
    inner_lower_fence = q1 - (multiplier * iqr)
    inner_upper_fence = q3 + (multiplier * iqr)
    return inner_lower_fence, inner_upper_fence


In [6]:
df = pd.read_csv('lemonade.csv')
df.head()

Unnamed: 0,Date,Day,Temperature,Rainfall,Flyers,Price,Sales
0,1/1/17,Sunday,27.0,2.0,15,0.5,10
1,1/2/17,Monday,28.9,1.33,15,0.5,13
2,1/3/17,Tuesday,34.5,1.33,27,0.5,15
3,1/4/17,Wednesday,44.1,1.05,28,0.5,17
4,1/5/17,Thursday,42.4,1.0,33,0.5,18


In [15]:
df.Day.value_counts()

Sunday       53
Monday       52
Tuesday      52
Wednesday    52
Thursday     52
Friday       52
Saturday     52
Name: Day, dtype: int64

In [16]:
df.Price.value_counts()

0.5    365
Name: Price, dtype: int64

---

### Multiplier of 1.5

Temp column - 

In [4]:
temp_lower, temp_upper = get_lower_and_upper_bounds(df.Temperature)
temp_lower, temp_upper

(16.700000000000003, 104.7)

In [5]:
df[df['Temperature'] < temp_lower]

Unnamed: 0,Date,Day,Temperature,Rainfall,Flyers,Price,Sales
364,12/31/17,Sunday,15.1,2.5,9,0.5,7


In [7]:
df[df['Temperature'] > temp_upper]

Unnamed: 0,Date,Day,Temperature,Rainfall,Flyers,Price,Sales
41,2/11/17,Saturday,212.0,0.91,35,0.5,21


Rainfall column -

In [8]:
rain_lower, rain_upper = get_lower_and_upper_bounds(df.Rainfall)
rain_lower, rain_upper

(0.26, 1.3)

In [9]:
df[df['Rainfall'] < rain_lower]

Unnamed: 0,Date,Day,Temperature,Rainfall,Flyers,Price,Sales


In [10]:
df[df['Rainfall'] > rain_upper]

Unnamed: 0,Date,Day,Temperature,Rainfall,Flyers,Price,Sales
0,1/1/17,Sunday,27.0,2.0,15,0.5,10
1,1/2/17,Monday,28.9,1.33,15,0.5,13
2,1/3/17,Tuesday,34.5,1.33,27,0.5,15
5,1/6/17,Friday,25.3,1.54,23,0.5,11
6,1/7/17,Saturday,32.9,1.54,19,0.5,13
10,1/11/17,Wednesday,32.6,1.54,23,0.5,12
11,1/12/17,Thursday,38.2,1.33,16,0.5,14
12,1/13/17,Friday,37.5,1.33,19,0.5,15
15,1/16/17,Monday,30.6,1.67,24,0.5,12
16,1/17/17,Tuesday,32.2,1.43,26,0.5,14


Flyers Column - 

In [12]:
flyer_lower, flyer_upper = get_lower_and_upper_bounds(df.Flyers)
flyer_lower, flyer_upper

(4.0, 76.0)

In [13]:
df[df['Flyers'] < flyer_lower]

Unnamed: 0,Date,Day,Temperature,Rainfall,Flyers,Price,Sales
324,11/21/17,Tuesday,47.0,0.95,-38,0.5,20


In [14]:
df[df['Flyers'] > flyer_upper]

Unnamed: 0,Date,Day,Temperature,Rainfall,Flyers,Price,Sales
166,6/16/17,Friday,99.3,0.47,77,0.5,41
194,7/14/17,Friday,92.0,0.5,80,0.5,40


Sales Column - 

In [17]:
sales_lower, sales_upper = get_lower_and_upper_bounds(df.Sales)
sales_lower, sales_upper

(5.0, 45.0)

In [18]:
df[df['Sales'] < sales_lower]

Unnamed: 0,Date,Day,Temperature,Rainfall,Flyers,Price,Sales


In [19]:
df[df['Sales'] > sales_upper]

Unnamed: 0,Date,Day,Temperature,Rainfall,Flyers,Price,Sales
181,7/1/17,Saturday,102.9,0.47,59,0.5,143
182,7/2/17,Sunday,93.4,0.51,68,0.5,158
183,7/3/17,Monday,81.5,0.54,68,0.5,235
184,7/4/17,Tuesday,84.2,0.59,49,0.5,534


---

### Multiplier of 3

Temp Column - 

In [20]:
temp_lower, temp_upper = get_lower_and_upper_bounds(df.Temperature, multiplier=3)
temp_lower, temp_upper

(-16.299999999999997, 137.7)

In [21]:
df[df['Temperature'] < temp_lower]

Unnamed: 0,Date,Day,Temperature,Rainfall,Flyers,Price,Sales


In [22]:
df[df['Temperature'] > temp_upper]

Unnamed: 0,Date,Day,Temperature,Rainfall,Flyers,Price,Sales
41,2/11/17,Saturday,212.0,0.91,35,0.5,21


Rainfall Column - 

In [23]:
rain_lower, rain_upper = get_lower_and_upper_bounds(df.Rainfall, multiplier=3)
rain_lower, rain_upper

(-0.13, 1.69)

In [24]:
df[df['Rainfall'] < rain_lower]

Unnamed: 0,Date,Day,Temperature,Rainfall,Flyers,Price,Sales


In [25]:
df[df['Rainfall'] > rain_upper]

Unnamed: 0,Date,Day,Temperature,Rainfall,Flyers,Price,Sales
0,1/1/17,Sunday,27.0,2.0,15,0.5,10
338,12/5/17,Tuesday,22.0,1.82,11,0.5,10
343,12/10/17,Sunday,31.3,1.82,15,0.5,11
364,12/31/17,Sunday,15.1,2.5,9,0.5,7


Flyers Column - 

In [26]:
flyer_lower, flyer_upper = get_lower_and_upper_bounds(df.Flyers, multiplier=3)
flyer_lower, flyer_upper

(-23.0, 103.0)

In [27]:
df[df['Flyers'] < flyer_lower]

Unnamed: 0,Date,Day,Temperature,Rainfall,Flyers,Price,Sales
324,11/21/17,Tuesday,47.0,0.95,-38,0.5,20


In [28]:
df[df['Flyers'] > flyer_upper]

Unnamed: 0,Date,Day,Temperature,Rainfall,Flyers,Price,Sales


Sales Column - 

In [29]:
sales_lower, sales_upper = get_lower_and_upper_bounds(df.Sales, multiplier=3)
sales_lower, sales_upper

(-10.0, 60.0)

In [30]:
df[df['Sales'] < sales_lower]

Unnamed: 0,Date,Day,Temperature,Rainfall,Flyers,Price,Sales


In [31]:
df[df['Sales'] > sales_upper]

Unnamed: 0,Date,Day,Temperature,Rainfall,Flyers,Price,Sales
181,7/1/17,Saturday,102.9,0.47,59,0.5,143
182,7/2/17,Sunday,93.4,0.51,68,0.5,158
183,7/3/17,Monday,81.5,0.54,68,0.5,235
184,7/4/17,Tuesday,84.2,0.59,49,0.5,534


---