In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import linear_model
import warnings
warnings.filterwarnings("ignore")

In [None]:
def convert_data(data):
    # Convert column of order date into datatime
    data['order_date'] = data['order_date'].astype('datetime64[ns]')
    # Extract the month information from order date
    data['Month'] = pd.DatetimeIndex(data['order_date']).month
    # Convert order date into week days
    data['week_info'] = data['order_date'].dt.day_name()
    # Convert order time into hour, minute and second
    data[['Hour','Minute', 'Second']] = data[
        'order_time'].str.split(":", expand=True)
    # Convert string column to integer
    data['Hour'] = data['Hour'].astype(int)
    data['Minute'] = data['Minute'].astype(int)
    data['Second'] = data['Second'].astype(int)
    # Divide meal time based on different hours
    data.loc[(data['Hour'] <= 13), 'Meal_time'] = 'Lunch'
    data.loc[
        (14 <= data['Hour']) & (
            data['Hour'] <= 16), 'Meal_time'] = 'Afternoon_Tea'
    data.loc[(17 <= data['Hour']) & (
            data['Hour'] <= 20), 'Meal_time'] = 'Dinner'
    data.loc[(data['Hour'] > 20), 'Meal_time'] = 'Late_Night_Meal'
    data.to_csv('Cleaned.csv')
    return data

In [None]:
def meal_hist(data):
    # Histogram graph for meal time for pizza order
    plt.hist(x = 'Meal_time', data = data, color = "purple", 
             bins = np.arange(5) - 0.5, rwidth = 0.5)
    # Adjust bar, titile and label name
    plt.title("Histogram Graph of different Meal Time")
    plt.xlabel("Meal time")
    plt.ylabel("Frequency")
    meal_bar = ('Lunch', "Afternoon Tea", 'Dinner', "Late Night Meal")
    meal_pos = np.arange(len(meal_bar))    
    plt.xticks(meal_pos, meal_bar)
    plt.xlim([-1, 4])
    plt.savefig("meal_hist.jpg")
    plt.close()

In [None]:
def week_hist(data):
    # Histogram graph for week information by day of week for pizza order
    week_sort = [
        'Sunday', 'Monday', 'Tuesday', 
        'Wednesday', 'Thursday', 'Friday', 'Saturday']
    index = dict(zip(week_sort,range(len(week_sort))))
    data['week_info'] = data['week_info'].map(index)
    # Set up figure size
    plt.figure(figsize = (15, 15))
    # Histogram graph for pizza sales 7 days in a week
    plt.hist(x = 'week_info', data = data, 
             bins = np.arange(8) - 0.5, rwidth = 0.5)
    # Adjust bar, titile, label name
    plt.title("Histogram Graph of Weekly Information")
    plt.xlabel("Week Dates")
    plt.ylabel("Frequency")
    week_pos = np.arange(len(week_sort))
    plt.xticks(week_pos, week_sort, color = 'orange', rotation = 20)
    plt.xlim([-1, 7])
    plt.savefig("week_hist.jpg")
    plt.close()   

In [None]:
def month_hist(data):
    # Covert numerical month idex as string
    month_bar = ['Jan', 'Feb', 'Mar', 'Apr', 
                 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] 
    # Histogram graph for month information for pizza order
    plt.hist(x = 'Month', data = data, color = 'green',  
             bins = np.arange(14) - 0.5, width = 0.5, align = 'mid')
    # Adjust bar, title and label name
    plt.title("Histogram Graph of Month Frequency")
    plt.xlabel("Month Dates")
    plt.ylabel("Frequency")
    month_pos = range(len(month_bar)) 
    plt.xticks([index + 0.75 for index in month_pos], 
               month_bar, rotation = 15, color = 'red')
    plt.savefig("month_hist.jpg")
    plt.close()

In [None]:
def category_hist(data):
    # Set up xbar label position
    car_bar = ['Classic','Veggie','Supreme','Chicken']
    # Histogram graph for pizza category information for pizza order
    plt.hist(x = 'pizza_category',
             bins = np.arange(5) - 0.5, data = data, width = 0.5)
    # Adjust bar, titile and label name
    plt.ylabel("Frequency")
    plt.title("Histogram Graph of Different Pizza Category")
    plt.xlabel("Pizza Category")
    plt.ylabel("Frequency")
    plt.xticks([index - 0.25 for index in range(len(car_bar))], car_bar)
    plt.xlim([-1, 4])
    plt.savefig("category_hist.jpg")
    plt.close()

In [None]:
def category_pie(data):
    # Set up figure size
    plt.figure(figsize = (10,10))
    # Extract labels and counts for pizza category
    cate_name = data["pizza_category"].value_counts().index
    cate_count = data["pizza_category"].value_counts()
    # Pie chart for different categories among all sales
    plt.pie(x = cate_count, labels = cate_name, rotatelabels=False, 
            autopct='%1.1f%%', startangle=45)
    plt.title("Pie Chart for Pizza Category")
    plt.savefig("category_pie.jpg")
    plt.close()

In [None]:
def size_hist(data):
    # Set up xbar label position
    size_bar = ['M','L','S','XL','XXL']
    # Histogram graph for pizza size information for pizza order
    plt.hist(x = 'pizza_size',
             bins = np.arange(6) - 0.5, data = data, width = 0.5, align = 'mid')
    # Adjust bar, titile and label name
    plt.ylabel("Frequency")
    plt.title("Histogram Graph of Different Pizza Size")
    plt.xlabel("Pizza Size")
    plt.ylabel("Frequency")
    plt.xticks([index - 0.25 for index in range(len(size_bar))], size_bar)
    plt.xlim([-1, 5])
    plt.savefig("size_hist.jpg")
    plt.close()

In [None]:
def size_pie(data):
    # Set up figure size
    plt.figure(figsize = (10,10))
    # Extract labels and counts for pizza category
    size_name = data["pizza_size"].value_counts().index
    size_count = data["pizza_size"].value_counts()
    # Pie chart for different pizza size among all sales
    plt.pie(x = size_count, labels = size_name, rotatelabels=False, 
            autopct='%1.1f%%', startangle=60, explode=(0,0,0,0,0.6))
    plt.title("Pie Chart for Pizza Size")
    plt.savefig("size_pie.jpg")
    plt.close()

In [None]:
def summary_data(clean_data):
    # Empty dictionary to store all statistical information
    pizza_sales_dict = {}
    # Check the sales on Lunch time and store in dictionary
    df_Lunch = clean_data.loc[clean_data['Meal_time'] == 'Lunch', 'total_price']
    pizza_sales_dict["Lunch"] = round(df_Lunch.sum(),2)
    # Sales for afternoon time
    df_Afternoon_Tea =clean_data.loc[clean_data['Meal_time'] == 'Afternoon_Tea', 
                                     'total_price']
    pizza_sales_dict["Afternoon_Tea"] = round(df_Afternoon_Tea.sum(),2)
    # Sales for dinner time
    df_Dinner =clean_data.loc[clean_data['Meal_time'] == 'Dinner', 
                              'total_price']
    pizza_sales_dict["Dinner"] = round(df_Dinner.sum(),2)
    # Sales for late night meal time
    df_Late_Night =clean_data.loc[
        clean_data['Meal_time'] == 'Late_Night_Meal', 'total_price']
    pizza_sales_dict["Late_Night_Meal"] = round(df_Late_Night.sum(),2)
    # Find maxmimum and minimum sales period
    max_sale_key = max(pizza_sales_dict.keys(), key=(
        lambda p: pizza_sales_dict[p]))
    min_sale_key = min(pizza_sales_dict.keys(), key=(
        lambda p: pizza_sales_dict[p]))
    max_sale_mealtime = pizza_sales_dict.get(max_sale_key)
    min_sale_mealtime = pizza_sales_dict.get(min_sale_key)
    pizza_sales_dict["Max"] = [max_sale_key, max_sale_mealtime]
    pizza_sales_dict["Min"] = [min_sale_key, min_sale_mealtime]
    # Find the amount of pizza sales based on different size
    size_sale = clean_data['pizza_size'].value_counts()
    pizza_sales_dict["Size sale"] = size_sale
    # Find total weekday sales
    weekday = ["Monday","Tuesday","Wednesday","Thursday","Friday"]
    weekday_sales = []
    #for loop to append sale cost in dictionary
    for i in weekday:
        day = clean_data.loc[clean_data["week_info"] == i,'total_price']
        weekday_sales.append(round(sum(day),2))
    total_weekday_sale = round(sum(weekday_sales),2)
    pizza_sales_dict["Weekday"] = total_weekday_sale
    # Find total weekend sales
    weekend = ["Saturday","Sunday"]
    weekend_sales = []
    #for loop to append sale cost in dictionary
    for i in weekend:
        day = clean_data.loc[clean_data["week_info"] == i,'total_price']
        weekend_sales.append(round(sum(day),2))
    total_weekend_sale = round(sum(weekend_sales),2)
    pizza_sales_dict["Weekend"] = total_weekend_sale
    # Find the most sold pizza name
    most_sold_pizza = clean_data['pizza_name'].value_counts().idxmax()
    pizza_sales_dict["Most_sold"] = most_sold_pizza
    return pizza_sales_dict

In [None]:
def linear_reg_month(data):
    # Sum up the montly sales from each order convert
    month_sales = data.groupby(["Month"],as_index = False)["total_price"].sum()
    # Sum up the montly pizza sales amount for each order
    month_amount = data.groupby(["Month"],as_index = False)["quantity"].sum()
    # Create numerical month index and merge cloumns into dataframe
    month_data = pd.DataFrame(month_amount)
    month_data.rename(columns = {'quantity':'Monthly_amount'},inplace = True)
    month_data["Monthly_sales"] = month_sales['total_price']
    # Independent variable of month index and montly pizza amount sales
    x = month_data [['Month', 'Monthly_amount']]
    # Dependent variable of montly sales
    y = month_data['Monthly_sales']
    # Machine learning to split data and create models
    X_train, X_test, y_train, y_test = train_test_split(
        x, y, random_state = 11)
    # Linear regression to predict montly sales based on quantity, month index
    linear_reg_month = LinearRegression()
    linear_reg_month.fit(X = X_train, y = y_train)
    return linear_reg_month

In [None]:
def linear_reg_eve(data):
    # Select pizza information data for the date with Chiristmas Eve 
    eve = data[data['order_date'] == '2015-12-24']
    # Count the total sales and quantity based on different meal time
    meal_sales = eve.groupby(
        ["Meal_time"],as_index = False)["total_price"].sum()
    meal_amount = eve.groupby(
        ["Meal_time"],as_index = False)["quantity"].sum()
    # Merge sales, quantity, meal time into dataframe
    eve_data = pd.DataFrame(meal_amount)
    eve_data.rename(columns = {'quantity':'Mealtime_amount'},inplace = True)
    eve_data["Eve_sales"] = meal_sales['total_price']
    # Convert meal time into categorical number
    eve_data['Meal_time'].replace(
        ['Lunch', 'Afternoon_Tea', 'Dinner', 'Late_Night_Meal'], 
        [1, 2, 3, 4], inplace=True)
    # Independent variable of mealtimeinde, pizza amount on different meal time
    x = eve_data [['Meal_time', 'Mealtime_amount']]
    # Dependent variable of sales for different time on Christmas Eve
    y = eve_data['Eve_sales']
    # Machine learning to split data and create models
    X_train, X_test, y_train, y_test = train_test_split(
        x, y, random_state = 11)
    # Create linear regression model
    # predict Chrismas Eve sales based on quantity, month index
    linear_reg_eve = LinearRegression()
    linear_reg_eve.fit(X = X_train, y = y_train)
    return linear_reg_eve

In [None]:
def linear_reg_unit_price(clean_data,pizza_size_input,pizza_category_input):
  #Select columns from dataset
  df_linear_model = clean_data.loc[:,
                    ["unit_price","pizza_size","pizza_category"]]
    
  #Convert pizza size into categorical number
  df_linear_model['pizza_size'].replace(['M', 'L', 'S', 'XL', 'XXL'],
                    [1, 2, 3, 4, 5], inplace =True)

  #Convert pizza category into categorical number
  df_linear_model['pizza_category'].replace(
      ['Classic', 'Veggie', 'Supreme', 'Chicken'],
                    [1, 2, 3, 4], inplace =True)
  #Independent variable of Pizza size, pizza category
  X = df_linear_model [['pizza_size', 'pizza_category']]
  #Dependent variable unit price
  y = df_linear_model['unit_price']
  #Train-test split
  X_train, X_test, y_train, y_test = train_test_split(
      X, y, random_state = 11)
  #Multiple regression to predict unit price based on pizza size and category
  multiple_regression_model = linear_model.LinearRegression()
  #Fit the model
  multiple_regression_model.fit(X, y)
  #Call the model
  predicted_unit_price = multiple_regression_model.predict([[pizza_size_input, pizza_category_input]])
  #Print result
  print("Predicted pizza unit price is $",predicted_unit_price)

In [None]:
# Ask user to provide which plot they want to check and store in local file
def print_plot(data):
    exit = False
    while(not exit):
        try:
            plot_num = int(input(
                    "Enter number want to check for Graphical Plots below:\n \
                    1. Histogram graph for Different Meal Time\n \
                    2. Histogram graph for Weekly Information\n \
                    3. Histogram graph for Month Information\n \
                    4. Histogram graph for Pizza Category\n \
                    5. Histogram graph for Pizza Size\n \
                    6. Pie chart for Pizza Category\n \
                    7. Pie chart for Pizza Size\n \
                    8. Back to previous section\n "))
            # Call back function to find stored infomation
            if plot_num == 1:
                meal_hist(data)
                print("Check your local file document for graph meal_hist.jpg")
            elif plot_num == 2:
                week_hist(data)
                print("Check your local file document for graph week_hist.jpg")
            elif plot_num == 3:
                month_hist(data)
                print("Check your local file document for graph month_hist.jpg")
            elif plot_num == 4:
                category_hist(data)
                print("Check your local file document for graph " \
                      "category_hist.jpg")
            elif plot_num == 5:
                size_hist(data)
                print("Check your local file document for graph size_hist.jpg")                
            elif plot_num == 6:
                category_pie(data)
                print("Check your local file document for graph " \
                      "category_pie.jpg")
            elif plot_num == 7:
                size_pie(data)
                print("Check your local file document for graph size_pie.jpg")                
            elif plot_num == 8:
                exit = True
            # If users provide incorrect number
            else:
                print("Please provide a correct number")
        # If users provide non-numeric input
        except:
            print("Please provide a correct number")

In [None]:
# Ask user to provide number to know what statistical information and print out
def print_stats(data):
    exit = False
    while(not exit):
        try:
            sta_num = int(input(
                    "Enter number want to check for statistics below:\n \
                    1. Lunch time pizza sales for whole year 2015\n \
                    2. Afternoon Tea time pizza sales\n \
                    3. Dinner time pizza sales\n \
                    4. Late Night Meal time pizza sales\n \
                    5. Maximum pizza sales meal time\n \
                    6. Minimum pizza sales meal time\n \
                    7. Pizza sale by size\n \
                    8. Total Weekday Pizza Sale Information \n \
                    9. Total Weekend Pizza Sale Information \n \
                    10. Most sold Pizza \n \
                    11. Back to previous section\n "))
            information = summary_data(data)
            # Call back function to find stored infomation
            if sta_num == 1:
                lunch_sales = information["Lunch"]
                print("Lunch time pizza sales for whole year is", lunch_sales)
            elif sta_num == 2:
                afternoon_sales = information["Afternoon_Tea"]
                print("Afternoon Tea time pizza sales for whole year is", 
                      afternoon_sales)
            elif sta_num == 3:
                dinner_sales = information["Dinner"]
                print("Dinner time pizza sales for whole year is", 
                      dinner_sales)
            elif sta_num == 4:
                late_sales = information["Late_Night_Meal"]
                print("Late Night Meal time pizza sales for whole year is", 
                      late_sales)
            elif sta_num == 5:
                max_name = information["Max"][0]
                max_val = information["Max"][1]
                print("Maximum Sale occured during", 
                      max_name, "with value", max_val)
            elif sta_num == 6:
                min_name = information["Max"][0]
                min_val = information["Max"][1]
                print("Minimum Sale occured during", 
                      min_name, "with value", min_val)
            elif sta_num == 7:
                table = information['Size sale']
                print("Here below is the table for pizza sales on" \
                      "different sizes: \n", table)                
            elif sta_num == 8:
                weekday_sale = information['Weekday']
                print("Total weekday sale is", weekday_sale)
            elif sta_num == 9:
                weekend_sale = information['Weekend']
                print("Total weekend sale is", weekend_sale)
            elif sta_num == 10:
                fav = information['Most_sold']
                print(fav, "was the most sold pizza.")
            elif sta_num == 11:
                exit = True
            # If users provide incorrect number
            else:
                print("Please provide a correct number")
        # If users provide non-numeric input
        except:
            print("Please provide a correct number")

In [None]:
# Ask user to provide number for which model and print prediction result
def print_predict(data):
    exit = False
    while(not exit):
        try:
            pred_num = int(input(
                    "Enter number want to check for Graphical Plots below:\n \
                    1. Predict on Monthly Sales\n \
                    2. Predict on Sales for Meal Time on Christmas Eve\n \
                    3. Predict Pizza Price based on size and category\n \
                    4. Back to previous section\n"))
            # Call back function to find stored infomation
            if pred_num == 1:
                month = int(input(
                    "Please provide the month you want to predict with (1-12): "
                ))
                month_amount = int(input(
                    "Please provide the pizza amount sold in that month: "))
                month_model = linear_reg_month(data)
                pred_month_sale = month_model.predict([[month, month_amount]])
                print("The prediction sale for that Month is", pred_month_sale)
            elif pred_num == 2:
                meal_time = int(input(
                    "Please provide the number for different meal time:\n \
                    1. Lunch(Smaller or equal to 1pm)\n \
                    2. Afternoon Tea(In between 2pm to 4pm)\n \
                    3. Dinner(In between 5pm to 8pm)\n \
                    4. Late Night Meal(Later than 8pm)\n"))
                meal_amount = int(input(
                    "Please provide the pizza amount sold in" \
                    "that period on Christmas Eve: \n"))
                meal_eve_model = linear_reg_eve(data)
                pred_meal_sale = meal_eve_model.predict([[meal_time, meal_amount]])
                print("The prediction sale for that Month is", pred_meal_sale)                
            elif pred_num == 3:
                pizza_size_input = int(input(
                    "Please provide the number for different Pizza size input:\n \
                    1. Medium \n \
                    2. Large \n \
                    3. Small\n \
                    4. Extra Large \n \
                    5. Extra Extra Large \n"
                  ))
                pizza_category_input = int(input(
                    "Please provide the number for different Pizza category input:\n \
                    1. Classic \n \
                    2. Veggie \n \
                    3. Supreme\n \
                    4. Chicken \n"
                ))
                linear_reg_unit_price(data,pizza_size_input,pizza_category_input)
            elif pred_num == 4:
                exit = True
            # If users provide incorrect number
            else:
                print("Please provide a correct number")
        # If users provide non-numeric input
        except:
            print("Please provide a correct number")

In [None]:
def main():
    # Import data
    pizza = pd.read_csv("Data Model - Pizza Sales.csv")
    # Data cleanning
    clean_data = convert_data(pizza)
    # Create Menu to ask users to make consult with
    exit = False
    while (not exit):
        try:
            option = int(input(
                "Type number for checking pizza sales information:\n \
                1. Graphical Information \n \
                2. Statistical Summary Information\n \
                3. Prediction Information\n \
                4. Quit the checking process\n "))
            # Based on provided number, call function to print out result
            if (option == 1):
                print_plot(clean_data)
            elif (option == 2):
                print_stats(clean_data)
            elif (option == 3):
                print_predict(clean_data)
            elif (option == 4):
                exit = True
                print("Come back Next time if you want to check") 
            else:
                print("Please provide correct number")
        except:
            print("Please provide correct number")
main()

Total weekday sale is 595474.15
Total weekend sale is 222385.9
