In [None]:
def import_clean_data(file_path):
    # Combine the date and time columns to one
    df = pd.read_csv(file_path, parse_dates=[["Date", "Time"]])
    # Remove Columns that are not useful to us
    df.drop(["Time Zone", "Transaction ID", "Payment ID", "Device Name", "Details", "Location", "Dining Option",
             "Customer ID", "Customer Name", "Customer Reference ID"], axis=1, inplace=True)
    # Remove the Refunds
    df = df[df.Category != "None"]
    # Convert columns to appropriate types
    # Dollar columns to floats using regex to remove dollar sign
    dollar_column = df[df.columns[7:11]].replace('[$]', '', regex=True)
    # Update df with proper dollar columns
    df.update(dollar_column)
    # Change the dollar columns to float type
    df[["Gross Sales", "Discounts", "Net Sales", "Tax"]] = df[["Gross Sales", "Discounts", "Net Sales", "Tax"]]\
        .apply(pd.to_numeric)

    return df

In [None]:
import pandas as pd

# Options to make pd easier to read from print
pd.options.display.max_columns = 999
pd.options.display.max_rows = 999
pd.options.display.width = 900

In [None]:
data = import_clean_data("Tea Room/2017-2018/item_sales.csv")
print(data.describe())
print(data.head(10))

In [None]:
def demand(df):
    #TODO Only include coffee sales as those are the only orders the express line can handle
    
    # Group All of the rows by weekday, hour, and minute, returns a groupBy object which is a collection of data frames
    # weekday: 0 = monday, 1 = tuesday, ... 6 = sunday
    grouped = df.groupby([df["Date_Time"].dt.weekday.rename('Day_of_Week'), df["Date_Time"].dt.hour.rename('Hour'), 
                          df["Date_Time"].dt.minute.rename('Minute')])
    
    # Get the number of days in the data frame
    num_days = len(df["Date_Time"].dt.normalize().unique())
    
    # Get how many orders are placed on that day of the week, at that time, at that minute
    num_occurrences = grouped.size().to_frame('Number of Orders').reset_index()
    # Get Average Number of orders on that day and time
    num_occurrences['Average Number of Orders'] = num_occurrences['Number of Orders'].apply(lambda x: x/num_days)
    
    return num_occurrences

In [None]:
def overwhelmed(orders_per_hour):
    df = demand(data)
    
    # Find times where average number of orders is greater than what we can handle
    busy_times = df['Average Number of Orders'] > orders_per_hour
    
    print(df[busy_times])
    
    
overwhelmed(.5)

In [None]:
## TODO Find a way to plot data
def graph_df(data):
    %matplotlib inline
    import matplotlib.pyplot as plt
    
    occurrences = demand(data)
    print(occurrences)

    #occurrences.plot(y='num_occurences', x=['Hour','Minute'], figsize=(20,10))


#graph_df(data)