In [None]:
# Basic Libraries
import numpy as np
import pandas as pd
import seaborn as sb
import matplotlib.pyplot as plt # we only need pyplot
sb.set() # set the default Seaborn style for graphics

In [None]:
# To print full list
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

In [None]:
data = pd.read_csv('data/chillipadi_cleandata.csv')
data.sample(5)

In [None]:
# Reduce numner of categories in Status

data.loc[data['Status'] == 'Done', 'Status'] = 'Completed'
data.loc[data['Status'] == 'DoubleChecked', 'Status'] = 'Completed'
data.loc[data['Status'] == 'Cancel', 'Status'] = 'Cancelled'

In [None]:
data['Status'].unique()

In [None]:
# covert some vars to categorical
categoricalVars = ['Status', 'MenuSection']
for var in categoricalVars:
    data[var] = data[var].astype('category')

# Feature Engineering

### GrossSales

In [None]:
data['GrossSales'] = data['MenuRate']*data['MenuPax']

### Age

In [None]:
from datetime import datetime, date

def age(born):
    born = datetime.strptime(born, "%Y-%m-%d").date()
    today = date.today()
    return today.year - born.year - ((today.month, today.day) < (born.month, born.day))

  
data['Age'] = data['DateOfBirth(Date)'].apply(age)
data = data.drop(columns=['DateOfBirth(Date)'])

### OrderDate & OrderTime

In [None]:
#converts cols related to datetime to 2 distinct cols of date and time
def parseDateTime(col, colPrimaryName,data):
    temp = pd.to_datetime(data[col], format='%Y-%m-%d %H:%M:%S', errors='coerce')
    data['{}DateTime'.format(colPrimaryName)] = temp.dt.strftime('%Y-%m-%d %H:%M:%S')
    data['{}Date'.format(colPrimaryName)] = temp.dt.strftime('%Y-%m-%d')
    data['{}Time'.format(colPrimaryName)] = temp.dt.strftime('%H:%M:%S')

In [None]:
parseDateTime('OrderDate', 'Order', data)

### FunctionDate & FunctionTime

In [None]:
parseDateTime('FunctionDate', 'Function', data)

### ModificationDate & ModificationTime

In [None]:
parseDateTime('ModificationDate', 'Modification', data)

### PackedDate & PackedTime

In [None]:
parseDateTime('PackedTime', 'Packed', data)

In [None]:
data.head()

### DaysInAdvance
number of days the customer ordered before the actual function

In [None]:
data['FunctionDateTime'] = pd.to_datetime(data['FunctionDateTime'])
data['OrderDateTime'] = pd.to_datetime(data['OrderDateTime'])
data['DaysInAdvance'] = (data['FunctionDateTime'] - data['OrderDateTime']).dt.days
data.head(10)

### Menu Categories

In [None]:
menuCatData = pd.read_excel('MenuCategories2.xlsx', sheet_name='MenuCategories')
menuCatData.head()

In [None]:
flatMenuCatData = pd.DataFrame(columns=['MenuName', 'MenuCategory'])

In [None]:
def flattenMenuCat(df):
    result = pd.DataFrame(columns=['MenuName', 'MenuCategory'])
    cols = df.columns
    for col in cols:
        temp = pd.DataFrame(columns=['MenuName', 'MenuCategory'])
        temp['MenuName'] = df[col]
        temp = temp[temp['MenuName'].notna()]
        temp['MenuCategory'] = col
        result = pd.concat([result,temp], ignore_index=True)
    result.drop_duplicates(subset=['MenuName'])
    return result

In [None]:
flatMenuCatData = flattenMenuCat(menuCatData)
flatMenuCatData.sample(5)

In [None]:
mergedData = pd.merge(data, flatMenuCatData, on='MenuName', how='left')
mergedData.info()

In [None]:
mergedData[mergedData['MenuCategory'].notna()].head()

In [None]:
from openpyxl import load_workbook

unassigned = pd.DataFrame(mergedData[mergedData['MenuCategory'].isna()]['MenuName'].unique(), columns=['MenuName'])

filename = 'MenuCategories.xlsx'
book = load_workbook(filename)
writer = pd.ExcelWriter(filename, engine='openpyxl') 
writer.book = book

writer.sheets = dict((ws.title, ws) for ws in book.worksheets)

unassigned.to_excel(writer, "Unassigned", index = False)

writer.save()

In [None]:
mergedData['MenuCategory'] = mergedData['MenuCategory'].fillna('Others')
mergedData.sample(5)

In [None]:
mergedData.to_csv('data/chillipadi_engineeredData.csv', index=False)

In [None]:
test = pd.DataFrame(mergedData)
moreThan1000 = test[test['MenuPax'] >= 1000]

In [None]:
import re

buffetOnly = moreThan1000[(moreThan1000['MenuName'].str.contains('buffet', flags=re.IGNORECASE)) | (moreThan1000['MenuCategory'] == "Buffet / Wedding")]
buffetOnly.head()

In [None]:
buffetOnly.to_csv('data/morethan1000pax.csv', index=False)