In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
def outlier_ejector(dataframe, column, k=1.5):
    """
    This function takes in a dataframe and looks for upper and lower outliers.
    """
    q1, q3  = dataframe[column].quantile(q=[0.25, 0.75])
    iqr = q3 - q1
    
    
    lower_bound = q1 - (k * iqr)
    upper_bound = q3 + (k * iqr)
    
    high_items = dataframe[column] > upper_bound
    low_items = dataframe[column] < lower_bound

    
    return dataframe[~low_items | ~high_items]

In [3]:
def outlier_detector(dataframe, col, k=1.5):
    """
    This function takes in a dataframe and looks for upper and lower outliers.
    """
    q1, q3  = dataframe[col].quantile(q=[0.25, 0.75])
    iqr = q3 - q1
    
    
    lower_bound = q1 - (k * iqr)
    upper_bound = q3 + (k * iqr)
    
    high_items = dataframe[col] > upper_bound
    low_items = dataframe[col] < lower_bound

    
    return dataframe[low_items | high_items]

In [4]:
df = pd.read_csv('lemonade.csv', infer_datetime_format=True, index_col='Date')

In [5]:
df.head()

Unnamed: 0_level_0,Day,Temperature,Rainfall,Flyers,Price,Sales
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1/1/17,Sunday,27.0,2.0,15,0.5,10
1/2/17,Monday,28.9,1.33,15,0.5,13
1/3/17,Tuesday,34.5,1.33,27,0.5,15
1/4/17,Wednesday,44.1,1.05,28,0.5,17
1/5/17,Thursday,42.4,1.0,33,0.5,18


Using lemonade.csv dataset and focusing on continuous variables:

- Use the IQR Range Rule and the upper and lower bounds to identify the lower outliers of each column of lemonade.csv, using the multiplier of 1.5. Do these lower outliers make sense? Which outliers should be kept?

In [12]:
num_cols = df.select_dtypes(exclude=['object'])
for col in num_cols.columns:
    print(f'{col} outliers')
    outlier_df = outlier_detector(df, col)
    print(outlier_df)
    print('----------------------------------------------------------------')

Temperature outliers
               Day  Temperature  Rainfall  Flyers  Price  Sales
Date                                                           
2/11/17   Saturday        212.0      0.91      35    0.5     21
12/31/17    Sunday         15.1      2.50       9    0.5      7
----------------------------------------------------------------
Rainfall outliers
                Day  Temperature  Rainfall  Flyers  Price  Sales
Date                                                            
1/1/17       Sunday         27.0      2.00      15    0.5     10
1/2/17       Monday         28.9      1.33      15    0.5     13
1/3/17      Tuesday         34.5      1.33      27    0.5     15
1/6/17       Friday         25.3      1.54      23    0.5     11
1/7/17     Saturday         32.9      1.54      19    0.5     13
1/11/17   Wednesday         32.6      1.54      23    0.5     12
1/12/17    Thursday         38.2      1.33      16    0.5     14
1/13/17      Friday         37.5      1.33      19    0

These temperatures app