In [251]:
# IMPORT LIBRARIES #
import pandas as pd
import pygsheets
import numpy as np

In [252]:
# FLAGS #

#Set to 1 to print diagnostic info
debugFlag = 0
#Set to 1 to pull info from RawNames sheet, join GPS, update, and export
updateGPSFlag = 0

In [253]:
# IMPORT DATA #

gc = pygsheets.authorize(service_file='file.json')
sh = gc.open('DashData')

if debugFlag:
    for x in sh:
        print(x)

for x in sh:
    title = x.title
    if(title=="DeliveriesOld"):
        import_sheets_OldDelDF = x.get_as_df()
    elif(title=="Deliveries"):
        import_sheets_DelDF = x.get_as_df()
    elif(title=="Days"):
        import_sheets_DaysDF = x.get_as_df()
    elif(title=="Weeks"):
        import_sheets_WeeksDF = x.get_as_df()
    elif(title=="RawNames"):
        import_sheets_RawNamesDF = x.get_as_df()
    elif(title=="GPS"):
        #for import
        import_sheets_GPSDF = x.get_as_df()
        #for export
        if updateGPSFlag:
            import_wks_gps = x
    else:
        print("Worksheet not stored: ", x)

Worksheet not stored:  <Worksheet 'Import' index:6>
Worksheet not stored:  <Worksheet 'Lifetime Deliveries' index:7>
Worksheet not stored:  <Worksheet 'Days Old' index:8>
Worksheet not stored:  <Worksheet 'Weeks Old' index:9>


In [254]:
# Allows for disjoint cells, one for import, one for reset to import value (for ease of testing)

#DF from sheets
sheets_OldDelDF   = import_sheets_OldDelDF.copy()
sheets_DelDF      = import_sheets_DelDF.copy()
sheets_DaysDF     = import_sheets_DaysDF.copy()
sheets_WeeksDF    = import_sheets_WeeksDF.copy()
sheets_RawNamesDF = import_sheets_RawNamesDF.copy()
sheets_GPSDF      = import_sheets_GPSDF.copy()

In [255]:
# FUNCTIONS #

def processStacks(df):
    df["Stack Encoding"] = 0 
    for index, row in df.iterrows():
        if type(row["Stacked"]) == int:
            df.at[index, "Stack Encoding"] = 2
            df.at[index, "Distance"] /= 2
            df.at[index, "Duration"] /= 2
        else:
            if (row["Stacked"] == ""):
                df.at[index, "Stack Encoding"] = 1
            elif (row["Stacked"] == "PENTA"):
                df.at[index, "Stack Encoding"] = 5
                df.at[index, "Distance"] /= 5
                df.at[index, "Duration"] /= 5
            else:
                df.at[index, "Stack Encoding"] = 3
                df.at[index, "Distance"] /= 3
                df.at[index, "Duration"] /= 3

def dollars_to_floats(df, columns):
    for x in columns:
        df[x] = df[x].str.replace('$', '')
        df[x] = df[x].str.replace(',', '')
        df[x] = df[x].astype(float)
        
def hours_to_floats(df, columns):
    for x in columns:
        for index, row in df.iterrows():
            active = row[x]
            activeH = float(active[:active.index(":")])
            activeM = round(float(active[active.index(":")+1:active.index(":")+3])/60, 2)
            active = activeH + activeM
            df.at[index, x] = active
        df[x] = df[x].astype(float)

def getStartHour(df):
    df["Start Hour"] = -1
    for index, row in df.iterrows():
        startHour = row["Start Time"]
        clockSet = startHour[-2]
        startHour = int(startHour[:startHour.index(":")])
        if (clockSet == "P"):
            startHour += 12
        df.at[index, "Start Hour"] = startHour

def getStartHour2(df):
    df["Start Hour2"] = -1
    for index, row in df.iterrows():
        startHour = row["Start Time"]
        clockSet = startHour[-2]
        startHour = int(startHour[:startHour.index(":")])
        if (clockSet == "P"):
            startHour += 12
        df.at[index, "Start Hour2"] = startHour        
        

def addStartAndEndDate(df):
    lastRow = df.iloc[[-1]]
    df = df[:-1]
    df["Start Date"] = -1
    df["End Date"]   = -1
    for index, row in df.iterrows():
        tempVar = row["Dates"]
        tempVar1 = tempVar[:tempVar.index(" ")]
        tempVar2 = tempVar[tempVar.index("- ")+2:]
        df.at[index, "Start Date"] = tempVar1
        df.at[index, "End Date"] = tempVar2
    df = pd.concat([df, lastRow], ignore_index=True)
    df.reset_index(drop=True, inplace=True)
    return df

#Currently only valid for 2023
def date_to_datetime(df, column):
    for index, row in df.iterrows():
        df.at[index, column] = pd.Timestamp('2023/' + row[column])
    df[column] = pd.to_datetime(df[column])
        
        
#Currently only valid for 2023
# Currently unused function
# Would we actually want this to have different dates? it's really the date the shift started
# So maybe, keep date same, but certain analysis might require that difference to be handled
def time_to_timestamp(df, DateCol, TimeCols):
    for index, row in df.iterrows():
        df.at[index, TimeCols[0]] = pd.Timestamp('2023/' + row[DateCol] + " " + row[TimeCols[0]])
        df.at[index, TimeCols[1]] = pd.Timestamp('2023/' + row[DateCol] + " " + row[TimeCols[1]])

In [256]:
# UPDATE GPS WORKSHEET #

if updateGPSFlag:
    testingDF = sheets_RawNamesDF.copy()
    testingDF = testingDF.drop_duplicates(subset=["Restaurant Name", "RAW DATA"])
    testingDF = testingDF[["Restaurant Name", "RAW DATA", "Note"]]
    newNamesDF = testingDF.merge(sheets_GPSDF, how='left')
    for index, row in newNamesDF.iterrows():
        if isinstance(row['Address'], float):
            if not row['Note'] == "":
                newNamesDF.at[index, "Address"] = row["Note"]
            else:
                print("Note not found for:", index)
        if (row['Address'] == ""):
            newNamesDF.at[index, "Resolved"] = "0"
        else:
            newNamesDF.at[index, "Resolved"] = "1"
    newNamesDF = newNamesDF.drop(columns=["Note"])
    import_wks_gps.set_dataframe(newNamesDF,(1,1))

In [257]:
# START PROCESSING #

In [258]:
# FUNCTIONS AND OTHER SHORT PROCESSING STEPS #


#V1/V2
dollars_to_floats(sheets_OldDelDF, ["Total", "Base", "Tip", "Peak Bonus"])
date_to_datetime(sheets_OldDelDF, "Date")
sheets_OldDelDF["Day of Week"] = sheets_OldDelDF["Date"].dt.day_name()
#V3 only
dollars_to_floats(sheets_DelDF, ["Total", "Base", "Tip", "Peak Bonus"])
getStartHour(sheets_DelDF)
date_to_datetime(sheets_DelDF, "Date")
sheets_DelDF["Day of Week"] = sheets_DelDF["Date"].dt.day_name()
processStacks(sheets_DelDF)
#Days
dollars_to_floats(sheets_DaysDF, ["Total", "Base", "Tip"])
hours_to_floats(sheets_DaysDF, ["Dash", "Active"])
date_to_datetime(sheets_DaysDF, "Date")
sheets_DaysDF["Day of Week"] = sheets_DaysDF["Date"].dt.day_name()
sheets_DaysDF["HourlyRate"] = sheets_DaysDF["Total"]/sheets_DaysDF["Dash"]
#Weeks
dollars_to_floats(sheets_WeeksDF, ["Total", "Base", "Tip", "Adj pay", "Other", "Pre Total"])
hours_to_floats(sheets_WeeksDF, ["Dash", "Active"])
sheets_WeeksDF = addStartAndEndDate(sheets_WeeksDF)
# pretotalactiveratio refers to weekly pre adjustment pay / time spent on deliveries
sheets_WeeksDF["pretotalactiveratio"] = round(sheets_WeeksDF["Pre Total"]/sheets_WeeksDF["Active"], 2)
sheets_WeeksDF["HourlyRate"] = sheets_WeeksDF["Total"]/sheets_WeeksDF["Dash"]

  df[x] = df[x].str.replace('$', '')
  df[x] = df[x].str.replace('$', '')
  df[x] = df[x].str.replace('$', '')
  df[x] = df[x].str.replace('$', '')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["Start Date"] = -1
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["End Date"]   = -1


In [259]:
# V3 SELECTION #
sheets_DaysDF1 = sheets_DaysDF[sheets_DaysDF["Version"] == 3].copy()
sheets_WeeksDF1 = sheets_WeeksDF[sheets_WeeksDF["Version"] == 3].copy() #totals doesn't have v3 so its dropped

In [260]:
# ACTIVE MILEAGE 
#tempDF = sheets_DelDF
tempDF = sheets_DelDF.groupby(["Date"]).agg({"Distance": "sum"})
tempDF = tempDF.rename(columns={"Distance": "ActiveMileage"})
sheets_DaysDF1 = sheets_DaysDF1.merge(tempDF, on=['Date', sheets_DaysDF1.index])
sheets_DaysDF1 = sheets_DaysDF1.drop(columns=['key_1'])
tempDF = sheets_DaysDF1.groupby(["WeekID"]).agg({"ActiveMileage": "sum"})
sheets_WeeksDF1 = sheets_WeeksDF1.merge(tempDF, on=['WeekID', sheets_WeeksDF1.index])
sheets_WeeksDF1 = sheets_WeeksDF1.drop(columns=['key_1'])

In [261]:
# BEGIN ADJ PAY SECTION (v3 only) #

# This establishes the estimated pay adjustment estimates by delivery, date, and week
# This is calculated using time spent delivering and distance approximations
# The real adjusted pay is based on weekly values
# I suspect the primary cause for difference between estimate and real values stems from 
#how I estimate mileage vs how the app tracks mileage

In [262]:
#Per delivery estimations


sheets_DelDF["Duration"] = pd.to_numeric(sheets_DelDF["Duration"])
sheets_DelDF["Est Adj"] = (sheets_DelDF["Duration"]/60*18
                           + sheets_DelDF["Distance"]*(0.34) ) - sheets_DelDF["Base"]
#Duration is over 60 because its value is in minutes, values from day and week are in hours

In [263]:
#Per day estimations
sheets_DaysDF1["Est Adj"] = (sheets_DaysDF1["Active"]*(18) 
                            + sheets_DaysDF1["ActiveMileage"]*(0.34) ) - sheets_DaysDF1["Base"]

In [264]:
#Per week estimations
sheets_WeeksDF1["Est Adj"] = (sheets_WeeksDF1["Active"]*(18) 
                            + sheets_WeeksDF1["ActiveMileage"]*(0.34) ) - sheets_WeeksDF1["Base"]

In [265]:
# END ADJ PAY SECTION #

In [266]:
# REBUILD V1,2 + V3 SETS

# Deliveries #
sheets_result = pd.concat([sheets_OldDelDF, sheets_DelDF], sort=False)
sheets_result.reset_index(drop=True, inplace=True)

# V1, V2 #
sheets_DaysDF0 = sheets_DaysDF[sheets_DaysDF["Version"] != 3]
sheets_WeeksDF0 = sheets_WeeksDF[sheets_WeeksDF["Version"] != 3]
total_S = sheets_WeeksDF0.iloc[[-1]]
sheets_WeeksDF0 = sheets_WeeksDF0[:-1]

# Days #
sheets_DaysDF = pd.concat([sheets_DaysDF0, sheets_DaysDF1], sort=False)
sheets_DaysDF.reset_index(drop=True, inplace=True)

# Weeks #
sheets_WeeksDF = pd.concat([sheets_WeeksDF0, sheets_WeeksDF1], sort=False)
sheets_WeeksDF = pd.concat([sheets_WeeksDF, total_S], ignore_index=True)
sheets_WeeksDF.reset_index(drop=True, inplace=True)

In [267]:
if debugFlag:
    print(list(sheets_result.columns.values))
    print(sheets_result.dtypes)
    print()
    print(list(sheets_DaysDF.columns.values))
    print(sheets_DaysDF.dtypes)
    print()
    print(list(sheets_WeeksDF.columns.values))
    print(sheets_WeeksDF.dtypes)

In [268]:
# END PROCESSING #
# START INITIAL EXPLORATION #

In [269]:
#Percentage of deliveries taken that tip
numUntipped = (sheets_result["Tip"] == 0.0).sum()
numTipped = (sheets_result["Tip"] != 0.0).sum()
percTipped = round((numTipped/(numUntipped+numTipped)), 4)
print(percTipped*100, "% of accepted deliveries included tip")

#Average tip, including and excluding untipped deliveries
TipAvg = round(sheets_result["Tip"].mean(), 2)
print("$", TipAvg, "average tip value including orders with no tip")
TipAvg2 = round(sheets_result[sheets_result["Tip"] !=0]["Tip"].mean(), 2)
print("$", TipAvg2, "average tip value excluding orders with no tip")

# Overall hourly rate
hourlyRate = round(sheets_WeeksDF.iloc[-1]["Total"]/sheets_WeeksDF.iloc[-1]["Dash"], 2)
print("Overall hourly pay rate is $", hourlyRate)

# Max and Average values by Delivery, Day, and Week
# Some of these values are wrong, because I have not yet implemented a stack value handler
print()
print("Daily Max Values")
print(sheets_DaysDF[["Total", "Base", "Tip", "Active", "Dash", "Deliveries"]].max())
print(sheets_DaysDF[sheets_DaysDF["Version"] == 3][["Total Mileage", "Active Mileage"]].max(), "\n" )
print("Daily Average Values")
print(sheets_DaysDF[["Total", "Base", "Tip", "Active", "Dash", "Deliveries"]].mean())
print(sheets_DaysDF[sheets_DaysDF["Version"] == 3][["Total Mileage", "Active Mileage"]].mean(), "\n" )
print()
# Some of these values are wrong, because I have not yet implemented a stack value handler
print("Weekly Max Values")
#
print(sheets_WeeksDF[["Base", "Tip", "Adj pay", "Pre Total", "Total", "Active", "Dash", "Deliveries"]].max())
#
print(sheets_WeeksDF[sheets_WeeksDF["Version"] == 3][["Total Mileage", "Active Mileage"]].max(), "\n")
print("Weekly Average Values")
#
print(sheets_WeeksDF[["Base", "Tip", "Adj pay", "Pre Total", "Total", "Active", "Dash", "Deliveries"]].mean())
#
print(sheets_WeeksDF[sheets_WeeksDF["Version"] == 3][["Total Mileage", "Active Mileage"]].mean(), "\n")
print()
# Some of these values are wrong, because I have not yet implemented a stack value handler
print("Delivery Max Values")
#
print(sheets_DelDF[["Distance", "Duration"]].max())
print(sheets_result[["Total"]].max(), "\n")
print("Delivery Average Values")
#
print(sheets_DelDF[["Distance", "Duration"]].mean())
print(sheets_result[["Total"]].mean(), "\n")

# END INITIAL EXPLORATION #

90.72 % of accepted deliveries included tip
$ 4.11 average tip value including orders with no tip
$ 4.53 average tip value excluding orders with no tip
Overall hourly pay rate is $ 25.53

Daily Max Values
Total         333.57
Base          191.50
Tip           149.53
Active          9.67
Dash           13.60
Deliveries     33.00
dtype: float64
Total Mileage       151
Active Mileage    229.2
dtype: object 

Daily Average Values
Total         159.728615
Base           89.503846
Tip            70.224769
Active          5.070462
Dash            7.278769
Deliveries     17.061538
dtype: float64
Total Mileage     91.625000
Active Mileage    87.621875
dtype: float64 


Weekly Max Values
Base           5817.75
Tip            4564.61
Adj pay        1681.17
Pre Total     10382.36
Total         12096.28
Active          330.30
Dash            473.75
Deliveries     1109.00
dtype: float64
Total Mileage       633
Active Mileage    619.4
dtype: object 

Weekly Average Values
Base           581.7750
Tip

In [270]:
# Restaurants DF #

tempDF = sheets_result.copy()
#The line below drops restaurants that only appear once, we may no longer want this to happen
#Commented out for now, may make sense to use when considering outliers for averages
#tempDF = tempDF[sheets_result["Restaurant Name"].duplicated(keep=False) == True]
tempDF = tempDF.groupby(["Restaurant Name"]).agg({"Base": [np.mean, np.sum], 
                                                  "Tip": [np.mean, np.sum],
                                                  "Total": [np.mean, np.sum],
                                                 'Restaurant Name': 'size'})
tempDF.columns = ['Avg Base', 'Sum Base', 'Avg Tip', 
                  'Sum Tip', 'Avg Total', 'Sum Total', "Deliveries"]

tempDF = tempDF.round({'Avg Base': 2, 
                       'Sum Base': 2,
                       'Avg Tip': 2,
                       'Sum Tip': 2,
                       'Avg Total': 2,
                       'Sum Total': 2})
tempDF1 = tempDF.index.copy() # All restaurant names visited
restaurantsDF = tempDF.copy()
# Change sort value here
restaurantsDF.sort_values("Sum Total", ascending=False)

Unnamed: 0_level_0,Avg Base,Sum Base,Avg Tip,Sum Tip,Avg Total,Sum Total,Deliveries
Restaurant Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Chick-fil-A,5.76,725.50,3.60,453.39,9.36,1178.89,126
Taco Bell,4.79,315.87,3.44,226.75,8.70,574.12,66
McDonald's,4.61,332.25,3.26,234.92,7.89,568.17,72
Dave's Hot Chicken,5.25,278.50,4.25,225.50,9.51,504.00,53
Chipotle,4.74,255.75,3.52,189.92,8.29,447.67,54
...,...,...,...,...,...,...,...
Everbowl,4.00,4.00,2.00,2.00,6.00,6.00,1
Kung Fu Noodle,5.50,5.50,0.00,0.00,5.50,5.50,1
North Park Produce Bakery and Grill,3.00,3.00,2.00,2.00,5.00,5.00,1
Papa Johns,4.00,4.00,1.00,1.00,5.00,5.00,1


In [271]:
# Restaurants DF #

#Dollars Earned vs Miles Driven is another factor I wanted to consider in the overall earnings equation

tempDF = sheets_result[sheets_result["Version"] == 3].copy()
tempDF = tempDF.groupby(["Restaurant Name"]).agg({"Total": "sum", "Distance": "sum"})
tempDF = tempDF.round({'Base': 2, 'Tip': 2})
tempDF["DollarMilesRatio"] = tempDF["Total"]/tempDF["Distance"]
tempDF2 = tempDF.index.copy() #All restaurants from v1 and v2
restaurantsDF = restaurantsDF.join(tempDF["DollarMilesRatio"])

In [272]:
restaurantsDF

Unnamed: 0_level_0,Avg Base,Sum Base,Avg Tip,Sum Tip,Avg Total,Sum Total,Deliveries,DollarMilesRatio
Restaurant Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
7-Eleven,3.96,146.37,2.32,86.00,6.42,237.37,37,2.048230
Acai Superfruits,5.25,5.25,5.00,5.00,10.25,10.25,1,
Advanced Auto Parts,6.95,34.75,0.00,0.00,6.95,34.75,5,2.151639
Albertsons,4.58,13.75,12.39,37.17,17.64,52.92,3,7.765909
Angry Chickz,6.25,6.25,5.00,5.00,11.25,11.25,1,2.848101
...,...,...,...,...,...,...,...,...
Wild Burger,5.00,5.00,4.00,4.00,9.00,9.00,1,
Wings-N-Things,4.56,18.25,3.62,14.50,8.19,32.75,4,3.072917
Wingstop,5.33,16.00,3.83,11.50,9.17,27.50,3,3.374233
Wingstop Drive,5.00,10.00,2.90,5.79,7.90,15.79,2,2.218605


In [273]:
# Select the restaurants from the complete list that do not 'yet' appear in v3
tempDF3 = tempDF1[~tempDF1.isin(tempDF2)].dropna(how = 'all')
tempDF3

Index(['Acai Superfruits', 'Beach Hut Deli', 'Bloom Haus',
       'Brooklyn Boyz Pizza', 'Chili's Grill & Bar', 'Chin's Szechwan',
       'Cream Of The Crop', 'Epic Wings', 'Erika's Mexican Food & Seafood',
       'Everbowl', 'FTD', 'Firehouse Subs', 'Greek Corner Cafe',
       'Guanatos City Mexican Restaurant', 'Handel's Homemade Ice Cream',
       'Harney Sushi', 'Jamba', 'Joe's Crab Shack', 'Krispy Kreme',
       'Mikko Sushi', 'North Park Produce Bakery and Grill',
       'Ocean Thai Cuisine', 'PETCO', 'Pit Stop Diner', 'Pollos Maria',
       'Quik Wok', 'Sizzler', 'Sonic Drive-In', 'Super Mega Dilla',
       'TZONE Oceanside', 'Tapioca Express', 'TenderFix by Noah Schnapp',
       'The Cheesecake Factory', 'The Cheesesteak Grill', 'The Meltdown',
       'The Poke Cafe', 'Tony's Fresh Mexican Food',
       'Umi Japanese Grill & Sushi', 'Village Kabob', 'Wienerschnitzel',
       'Wild Burger'],
      dtype='object', name='Restaurant Name')

In [274]:
# Day of Week DF #

# NOTE: This excludes pay adjustments

tempDF = sheets_DaysDF.copy()
tempDF = tempDF.groupby(["Day of Week"]).agg(
    {"Dash": "sum", 
     "Total": [np.mean, np.sum], 
     "Tip": "sum", 
     "Deliveries": "sum"})
tempDF.columns = ['Sum Dash', 'Avg Daily Total', 'Sum Total', 'Sum Tip', 'Sum Deliveries']
tempDF["$/hr"] = tempDF["Sum Total"]/tempDF["Sum Dash"]
tempDF["Avg Delivery Tip"] = tempDF["Sum Tip"]/tempDF["Sum Deliveries"]
tempDF["Avg Delivery Total"] = tempDF["Sum Total"]/tempDF["Sum Deliveries"]
tempDF = tempDF.round({'Sum Dash': 2, 
                       'Avg Total': 2, 
                       'Sum Total':2, 
                       'Sum Tip': 2,
                       '$/hr': 2,
                       'AvgTip': 2,
                       'AvgTotal':2})
display(tempDF)

Unnamed: 0_level_0,Sum Dash,Avg Daily Total,Sum Total,Sum Tip,Sum Deliveries,$/hr,Avg Delivery Tip,Avg Delivery Total
Day of Week,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Friday,142.37,188.516471,3204.78,1460.28,333,22.51,4.385225,9.623964
Monday,17.85,125.823333,377.47,158.97,41,21.15,3.877317,9.206585
Saturday,116.18,178.88,2683.2,1179.95,279,23.1,4.229211,9.617204
Sunday,49.58,147.3475,1178.78,545.53,122,23.78,4.471557,9.662131
Thursday,72.83,140.773636,1548.51,656.01,176,21.26,3.72733,8.798352
Tuesday,1.82,31.75,31.75,14.0,3,17.45,4.666667,10.583333
Wednesday,72.49,135.787,1357.87,549.87,155,18.73,3.547548,8.760452


In [275]:
# UNDER CONSTRUCTION #

In [276]:
sheets_WeeksDF.tail()

Unnamed: 0,Dates,Base,Tip,Adj pay,Other,Pre Total,Total,Active,Dash,Deliveries,Total Mileage,Active Mileage,Version,WeekID,Start Date,End Date,pretotalactiveratio,HourlyRate,ActiveMileage,Est Adj
15,7/24 - 7/30,278.25,209.79,71.96,0.0,488.04,560.0,14.82,19.52,50,285,271.8,3,17.0,7/24,7/30,32.93,28.688525,214.3,61.372
16,7/31 - 8/6,248.75,183.44,74.75,0.0,432.19,506.94,13.75,18.57,44,272,386.2,3,18.0,7/31,8/6,31.43,27.298869,188.3,62.772
17,8/7 - 8/13,149.5,109.5,28.66,0.0,259.0,287.66,7.62,10.15,26,141,120.6,3,19.0,8/7,8/13,33.99,28.340887,108.0,24.38
18,8/14 - 8/20,77.0,56.0,0.0,0.0,133.0,133.0,4.85,6.52,15,73,62.1,3,20.0,8/14,8/20,27.42,20.398773,52.7,28.218
19,TOTALS:,5817.75,4564.61,1681.17,32.75,10382.36,12096.28,330.3,473.75,1109,2932,2803.86,-,,,,31.43,25.533045,,


In [277]:
sheets_DaysDF.tail()

Unnamed: 0,Date,Total,Base,Tip,Start (24),End (24),Active,Dash,Offers,Deliveries,...,Mileage End,Total Mileage,Active Mileage,Version,Pauses,WeekID,Day of Week,HourlyRate,ActiveMileage,Est Adj
60,2023-08-06,95.0,56.5,38.5,16:37,20:50,3.32,4.22,11,11,...,71510,56,50.7,3,"(7:04pm, 7:09pm)",18,Sunday,22.511848,39.2,16.588
61,2023-08-11,172.25,97.75,74.5,13:54,20:43,5.07,6.8,17,17,...,71699,98,87.2,3,"(2:55pm, 3:11pm)(5:59pm, 6:07pm)",19,Friday,25.330882,74.6,18.874
62,2023-08-12,86.75,51.75,35.0,17:45,21:06,2.55,3.35,9,9,...,71760,43,33.4,3,0,19,Saturday,25.895522,33.4,5.506
63,2023-08-15,31.75,17.75,14.0,17:13,19:03,1.12,1.82,3,3,...,71851,16,9.9,3,0,20,Tuesday,17.445055,9.9,5.776
64,2023-08-16,101.25,59.25,42.0,16:01,20:42,3.73,4.68,12,12,...,71909,57,52.2,3,0,20,Wednesday,21.634615,42.8,22.442


In [278]:
sheets_result.tail()

Unnamed: 0,ID,Date,Restaurant Name,Total,Base,Tip,Peak Bonus,Stacked,Version,Day of Week,Start Time,Distance,End Time,Duration,Start Hour,Stack Encoding,Est Adj
1105,1107,2023-08-16,Panda Express,4.0,4.0,0.0,0.0,1104.0,3,Wednesday,7:53:00 PM,1.55,8:13:00 PM,10.0,19.0,2.0,-0.473
1106,1108,2023-08-16,Dave's Hot Chicken,8.5,4.0,4.5,0.0,,3,Wednesday,4:04:00 PM,1.9,4:19:00 PM,15.0,16.0,1.0,1.146
1107,1109,2023-08-16,Dave's Hot Chicken,10.75,7.75,3.0,0.0,,3,Wednesday,6:35:00 PM,5.7,6:55:00 PM,20.0,18.0,1.0,0.188
1108,1110,2023-08-16,Chick-fil-A,10.25,7.75,2.5,0.0,,3,Wednesday,8:19:00 PM,7.7,8:42:00 PM,23.0,20.0,1.0,1.768
1109,1111,2023-08-16,Sabai Sabai Thai Kitchen,10.75,3.75,7.0,0.0,1103.0,3,Wednesday,6:57:00 PM,3.15,7:22:00 PM,12.5,18.0,2.0,1.071


In [279]:
#THIS IS NOT A PERFECT ESTIMATION
#THIS ASSUMES AN HOURLY RATE THAT IS CALCULATED AS FOLLOWS
#EVERY DATE, ADD TOTAL FOR ORDERS THAT START AT EACH HOUR
#THIS IS HOURLY RATE BY DATE AND BY HOUR
#GET AVERAGE, FOR DAY OF WEEK AND HOUR
#THIS YIELDS AN APPROXIMATION OF HOURLY RATE BY DAY OF WEEK AND HOUR (E.G. 4PM FRIDAY)

tempDF = sheets_result.copy()
tempDF = tempDF.groupby(["Date", "Day of Week", "Start Hour"]).agg({"Total" : "sum"})
print(tempDF.to_markdown())

|                                                       |   Total |
|:------------------------------------------------------|--------:|
| (Timestamp('2023-05-26 00:00:00'), 'Friday', 13.0)    |   19.94 |
| (Timestamp('2023-05-26 00:00:00'), 'Friday', 14.0)    |    9.75 |
| (Timestamp('2023-05-26 00:00:00'), 'Friday', 15.0)    |   22.25 |
| (Timestamp('2023-05-26 00:00:00'), 'Friday', 16.0)    |   16.75 |
| (Timestamp('2023-05-26 00:00:00'), 'Friday', 17.0)    |   25.75 |
| (Timestamp('2023-05-26 00:00:00'), 'Friday', 18.0)    |   23.5  |
| (Timestamp('2023-05-26 00:00:00'), 'Friday', 19.0)    |   29.25 |
| (Timestamp('2023-05-26 00:00:00'), 'Friday', 20.0)    |   44.25 |
| (Timestamp('2023-05-26 00:00:00'), 'Friday', 24.0)    |   23.25 |
| (Timestamp('2023-05-27 00:00:00'), 'Saturday', 13.0)  |    7.25 |
| (Timestamp('2023-05-27 00:00:00'), 'Saturday', 14.0)  |   25.75 |
| (Timestamp('2023-05-27 00:00:00'), 'Saturday', 15.0)  |   24.5  |
| (Timestamp('2023-05-27 00:00:00'), 'Saturday',

In [280]:
tempDF1 = tempDF.copy()
tempDF1 = tempDF1.groupby(["Day of Week", "Start Hour"]).agg({"Total" : "mean"})
print(tempDF1.to_markdown())

|                     |   Total |
|:--------------------|--------:|
| ('Friday', 13.0)    | 17.4914 |
| ('Friday', 14.0)    | 26.9187 |
| ('Friday', 15.0)    | 22.9725 |
| ('Friday', 16.0)    | 20.3562 |
| ('Friday', 17.0)    | 26.3433 |
| ('Friday', 18.0)    | 26.4211 |
| ('Friday', 19.0)    | 22.88   |
| ('Friday', 20.0)    | 27.395  |
| ('Friday', 21.0)    | 16.72   |
| ('Friday', 22.0)    |  8.375  |
| ('Friday', 23.0)    | 15.5    |
| ('Friday', 24.0)    | 20.5833 |
| ('Monday', 13.0)    | 19.25   |
| ('Monday', 14.0)    | 38      |
| ('Monday', 16.0)    | 20.5    |
| ('Monday', 17.0)    | 24.75   |
| ('Monday', 18.0)    | 26.25   |
| ('Monday', 19.0)    | 26.5    |
| ('Monday', 20.0)    | 39.75   |
| ('Monday', 21.0)    | 18      |
| ('Monday', 22.0)    |  7.25   |
| ('Monday', 23.0)    | 13.5    |
| ('Monday', 24.0)    |  7.75   |
| ('Saturday', 12.0)  | 27.5    |
| ('Saturday', 13.0)  | 16.5    |
| ('Saturday', 14.0)  | 25.75   |
| ('Saturday', 15.0)  | 22.546  |
| ('Saturday',

In [281]:
tempDF_A = sheets_result.copy()
tempDF_A = tempDF_A.groupby(["Date", "Day of Week", "Start Hour"]).agg({"Total" : "sum"})
print(tempDF_A.to_markdown())


|                                                       |   Total |
|:------------------------------------------------------|--------:|
| (Timestamp('2023-05-26 00:00:00'), 'Friday', 13.0)    |   19.94 |
| (Timestamp('2023-05-26 00:00:00'), 'Friday', 14.0)    |    9.75 |
| (Timestamp('2023-05-26 00:00:00'), 'Friday', 15.0)    |   22.25 |
| (Timestamp('2023-05-26 00:00:00'), 'Friday', 16.0)    |   16.75 |
| (Timestamp('2023-05-26 00:00:00'), 'Friday', 17.0)    |   25.75 |
| (Timestamp('2023-05-26 00:00:00'), 'Friday', 18.0)    |   23.5  |
| (Timestamp('2023-05-26 00:00:00'), 'Friday', 19.0)    |   29.25 |
| (Timestamp('2023-05-26 00:00:00'), 'Friday', 20.0)    |   44.25 |
| (Timestamp('2023-05-26 00:00:00'), 'Friday', 24.0)    |   23.25 |
| (Timestamp('2023-05-27 00:00:00'), 'Saturday', 13.0)  |    7.25 |
| (Timestamp('2023-05-27 00:00:00'), 'Saturday', 14.0)  |   25.75 |
| (Timestamp('2023-05-27 00:00:00'), 'Saturday', 15.0)  |   24.5  |
| (Timestamp('2023-05-27 00:00:00'), 'Saturday',

In [282]:
tempDF_B = tempDF_A.copy()
tempDF_B = tempDF_B.groupby(["Start Hour"]).agg({"Total" : "mean"})
print(tempDF_B.to_markdown())

|   Start Hour |   Total |
|-------------:|--------:|
|           11 | 14.25   |
|           12 | 27.5    |
|           13 | 16.6029 |
|           14 | 25.3882 |
|           15 | 20.8388 |
|           16 | 23.0136 |
|           17 | 21.4613 |
|           18 | 25.7434 |
|           19 | 23.3937 |
|           20 | 24.5414 |
|           21 | 19.5371 |
|           22 | 16.1522 |
|           23 | 14.3333 |
|           24 | 16.4286 |


In [283]:
#ah, right, start hour is 12 not 24
#this is a tomorrow problem
#tempDF = sheets_DelDF.copy()
#getStartHour2(tempDF)
#tempDF.tail()