In [2]:
import pandas as pd

# Options to make pd easier to read from print
pd.options.display.max_columns = 999
pd.options.display.max_rows = 999
pd.options.display.width = 900

In [3]:
def import_clean_data(file_path):
    # Combine the date and time columns to one
    df = pd.read_csv(file_path, parse_dates=[["Date", "Time"]])
    # Remove Columns that are not useful to us
    df.drop(["Time Zone", "Transaction ID", "Payment ID", "Device Name", "Details", "Location", "Dining Option",
             "Customer ID", "Customer Name", "Customer Reference ID"], axis=1, inplace=True)
    # Remove the Refunds
    df = df[df.Category != "None"]
    # Convert columns to appropriate types
    # Dollar columns to floats using regex to remove dollar sign
    dollar_column = df[df.columns[7:11]].replace('[$]', '', regex=True)
    # Update df with proper dollar columns
    df.update(dollar_column)
    # Change the dollar columns to float type
    df[["Gross Sales", "Discounts", "Net Sales", "Tax"]] = df[["Gross Sales", "Discounts", "Net Sales", "Tax"]]\
        .apply(pd.to_numeric)

    return df

In [4]:
data = import_clean_data("Tea Room/2017-2018/item_sales.csv")
print(data.describe())
print(data.head(2000))

                 Qty    Gross Sales      Discounts      Net Sales            Tax
count  103460.000000  103460.000000  103460.000000  103460.000000  103460.000000
mean        1.023671       3.395213      -0.086345       3.308868       0.327317
std         0.190461       2.715676       0.342503       2.754802       0.267416
min        -2.000000      -6.750000     -11.400000      -6.750000      -0.880000
25%         1.000000       2.150000       0.000000       2.000000       0.110000
50%         1.000000       2.750000       0.000000       2.650000       0.250000
75%         1.000000       3.880000       0.000000       3.850000       0.470000
max         8.000000     120.000000       2.150000     120.000000      10.480000
               Date_Time                 Category                         Item  Qty   Price Point Name      SKU          Modifiers Applied  Gross Sales  Discounts  Net Sales   Tax                            Notes Event Type
2    2018-04-13 17:36:17          Merchandise-M