# Measure The 6 Plus Bin Heuristics Accuracy

As a refresher, these are our occasions for the 3-5 bin:

1. `Family Event`
2. `Drinking`
3. `Breakfast`
4. `Lunch`
5. `Dinner`
6. `Social Gathering`
7. `After Work`

We're going to pick specific labeled tables, and see how our heuristics is doing.

In [2]:
data_map = {
    "hockey": "../data/hockey_3_text_processed.csv",
    "valentine": "../data/valentine_3_text_processed.csv",
    "silvester": "../data/silvester_3_text_processed.csv"
}

The following are taken from the *annotations/occasions_annotations_bin3to5* notebook.

In [2]:
labeled_tables_map = {
    "hockey": 
[(514594544, "DINNER"),
 (520144566, "DINNER_PARTY"), #problem in heurstic due to item qunatity
 (512856753,"UNKNOWN"),
 (523552965,"FAMILY_EVENT"),
 (522777307,"DRINKING"),
 (520932269,"DINNER"),
 (520961529,"FAMILY_EVENT"), # bug in extracting the kid meal
 (522822209,"DRINKING"),
 (524870243,"FAMILY_EVENT"),
 (514742701,"SOCIAL_GATHERING"),
 (521935119,"DRINKING"),
 (514531924,"DINNER"),
 (513465814,"DINNER")
],
    "valentine": 
[(434693771,"FAMILY_EVENT"),# bug in extracting the kid meal
 (446750517,"UNKNOWN"),
 (447075894,"DINNER"),
 (447589967,"AFTER_WORK"),
 (434539751,"FAMILY_EVENT"),
 (448045237,"FAMILY_EVENT"),
 (447558668,"DINNER"),
 (448730983,"DRINKING"),
 (447983682,"AFTER_WORK"),
 (447100562,"FAMILY_EVENT"),
 (434734939,"UNKNOWN"),
 (448721414,"SOCIAL_GATHERING"),
 (434681763,"FAMILY_EVENT"),
 (447350607,"LUNCH"),
 (435068108,"UNKNOWN"),
 (435006655,"LUNCH")
],
    "silvester": 
[(361348245,"DINNER"),
(360970732,"FAMILY_EVENT"),
(360903046,"DINNER"),
(363236528,"FAMILY_EVENT"),
(361588010,"UNKNOWN"),
(360052821,"FAMILY_EVENT"),
(360026445,"AFTER_WORK"),
(362956644,"DINNER"),
(360848387,"DINNER"),
 (362889042,"LUNCH"),
 (358979972,"DINNER"),
 (360542786,"DINNER"),
 (362306771,"SOCIAL_GATHERING")
]
}

------

In [3]:
import pandas as pd

Import the classifier function `6Plus Classifier`:

In [4]:
from bin_6plus import Bin6PlusClassifier as Classifier
classifier = Classifier()

-----

### Results per Table:

In [5]:
tables = ["hockey", "valentine", "silvester"]

In [6]:
results = {}
results_new = {}

In [7]:
from occasion_classifier import shrink_orders_to_table

In [8]:
for table in tables:
    print("Running for", table)
    df_path = data_map[table]
    df = pd.read_csv(df_path)
    
    labeled_tables = test_labeled_tables_map[table] # test
#     labeled_tables = labeled_tables_map[table]
    results[table] = []
    for order_id, true_ocassion in labeled_tables:
        orders = df[df.order_id == order_id]
        orders = shrink_orders_to_table(orders)
        pred_occasion = classifier.classify(orders)
        results[table].append((order_id, true_ocassion, pred_occasion))

Running for hockey
Running for valentine
Running for silvester


#### Show the results:

In [9]:
columns = ["order_id", "true_occasion", "pred_occasion"]

In [14]:
def color(data):
    correct = data["true_occasion"] == data["pred_occasion"]
    if correct:
        color = "#58f200"
    else:
        color = "#ee1300"

    return ["background-color: %s" % color] * len(data.values)

In [15]:
table = "hockey"

df = pd.DataFrame(results[table], columns=columns)
df.index += 1
df.style.apply(color, axis=1)
# df.to_csv(str(table) + "_3to5_test_results.csv", index=False)

Unnamed: 0,order_id,true_occasion,pred_occasion
1,511468198,DINNER_PARTY,DINNER_PARTY
2,520017497,LUNCH,LUNCH
3,515744868,LUNCH,FAMILY_EVENT
4,524540364,LUNCH,LUNCH
5,520945506,UNKNOWN,UNKNOWN
6,521008563,DINNER,DINNER
7,520801601,FAMILY_EVENT,FAMILY_EVENT
8,523675728,DINNER,DINNER
9,521747487,DINNER,DINNER
10,523644747,UNKNOWN,UNKNOWN


In [16]:
table = "valentine"

df = pd.DataFrame(results[table], columns=columns)
df.index += 1
df.style.apply(color, axis=1)
# df.to_csv(str(table) + "_3to5_test_results.csv", index=False)

Unnamed: 0,order_id,true_occasion,pred_occasion
1,447350607,LUNCH,LUNCH
2,435125743,DINNER,DINNER
3,435153168,LUNCH,LUNCH
4,434620077,LUNCH,UNKNOWN
5,434695904,FAMILY_EVENT,FAMILY_EVENT
6,434681510,DINNER,DINNER
7,448169488,DRINKING_PARTY,DRINKING_PARTY
8,447026716,UNKNOWN,UNKNOWN
9,434539751,FAMILY_EVENT,FAMILY_EVENT
10,434693454,FAMILY_EVENT,FAMILY_EVENT


In [17]:
table = "silvester"

df = pd.DataFrame(results[table], columns=columns)
df.index += 1
df.style.apply(color, axis=1)
# df.to_csv(str(table) + "_3to5_test_results.csv", index=False)

Unnamed: 0,order_id,true_occasion,pred_occasion
1,361348633,DINNER,DINNER
2,361883307,DINNER,DINNER
3,359644273,SOCIAL_GATHERING,SOCIAL_GATHERING
4,361278114,FAMILY_EVENT,FAMILY_EVENT
5,360885273,FAMILY_EVENT,FAMILY_EVENT
6,360021556,FAMILY_EVENT,FAMILY_EVENT
7,361018772,FAMILY_EVENT,FAMILY_EVENT
8,360110682,DINNER_PARTY,DINNER_PARTY
9,364377200,DRINKING,DRINKING
10,359465093,FAMILY_EVENT,FAMILY_EVENT


In [1]:
test_labeled_tables_map = {
    "hockey": [
(511468198,"DINNER_PARTY"),
 (520017497,"LUNCH"),
 (515744868,"LUNCH"),
 (524540364,"LUNCH"),
 (520945506,"UNKNOWN"),
 (521008563,"DINNER"),
 (520801601,"FAMILY_EVENT"),
 (523675728,"DINNER"),
 (521747487,"DINNER"),
 (523644747,"UNKNOWN")
    ],

    "valentine": 
    [
(447350607,"LUNCH"),
(435125743,"DINNER"),
 (435153168,"LUNCH"),
 (434620077,"LUNCH"),
 (434695904,"FAMILY_EVENT"),
 (434681510,"DINNER"),
 (448169488,"DRINKING_PARTY"),
 (447026716,"UNKNOWN"),
 (434539751,"FAMILY_EVENT"),
 (434693454,"FAMILY_EVENT"),
 (434734930,"UNKNOWN")],

    "silvester":[
 (361348633,"DINNER"),
 (361883307,"DINNER"),
 (359644273,"SOCIAL_GATHERING"),
 (361278114,"FAMILY_EVENT"),
 (360885273,"FAMILY_EVENT"),
 (360021556,"FAMILY_EVENT"),
 (361018772,"FAMILY_EVENT"),
 (360110682,"DINNER_PARTY"),
 (364377200,"DRINKING"),
 (359465093,"FAMILY_EVENT"),
 (362868887,"SOCIAL_GATHERING")]
}
