In [1]:
import pandas as pd
from collections import defaultdict
import pickle

In [2]:
meal_statistics = pd.read_csv('data/mealstatistics.csv')
meals_dataframe = pd.read_csv('data/meals.csv')
meal_history = pd.read_csv('data/mealhistory_latest.csv')
canteens = pd.read_csv('data/canteens.csv')

In [3]:
canteens_dict = dict()
for idx, row in canteens.iterrows():
    canteens_dict[row['id']] = row['name']

In [4]:
weekday_dict = {0: 'Mondays', 1: 'Tuesdays', 2: 'Wednesdays', 3: 'Thursdays', 4: 'Fridays'}

In [5]:
with open('data/meals_db_4', 'rb') as f:
    meals_db = pickle.load(f)

In [6]:
meals_res = dict()
for k, v in meals_db.items():
    meals_res[k] = dict()
    for day, meal in v.items():
        if day >= '2022-10-01':
            meals_res[k][day] = dict()
            for it, l in meal.items():
                meals_res[k][day][it] = dict()


In [7]:
def check_start_and_end(l):
    if len(l) == 0:
        return
    if (l[0][0].hour != 10 or l[0][0].minute != 30) or (l[-1][0].hour != 14 or l[-1][0].minute != 30):
        print('oh no!')
        print(l[0][0], l[-1][0])

for k, v in meals_db.items():
    for day, meal in v.items():
        for it, l in meal.items():
            check_start_and_end(l)
    print('k', end='')


kkkkkkk

In [8]:
# check if we have all data point for every day
for k, v in meals_db.items():
    for day, meal in v.items():
        for it, l in meal.items():
            if len(l) != 241 and len(l) != 0:
                print('oh no!')
                print(k, day, it, len(l))
    print('k', end=' ')

k k k k k k k 

In [9]:
def find_total_served_meals(l):
    if len(l) == 0:
        return 0

    # we have to ignore ordered meals that were before the canteen started
    res = l[-1][2] - l[0][2]
    if res < 0:
        return 0
    return res

for k, v in meals_db.items():
    for day, meals in v.items():
        for idx, l in meals.items():
            r = find_total_served_meals(l)
            meals_res[k][day][idx]['served_meals'] = r
    print('k', end=' ')

k k k k k k k 

In [10]:
def find_wasted_meals(l):
    if len(l) == 0:
        return 0
    # only approximately, sometimes it's set to 0 but shouldn't
    res = l[-1][1]
    if res < 0:
        return 0
    return res

def find_wasted_meals_alt(l):
    if len(l) == 0:
        return 0

    # only approximately, sometimes it's set to 0 but shouldn't - we will search for the first amount that is not 0
    i = len(l) - 1
    while i >= 0 and l[i][1] <= 0:
        i -= 1

    if i < 0:
        return 0

    res = l[i][1] - 1
    return res


for k, v in meals_db.items():
    for day, meals in v.items():
        for idx, l in meals.items():
            r = find_wasted_meals(l)
            meals_res[k][day][idx]['wasted_meals'] = r
            s = find_wasted_meals_alt(l)
            meals_res[k][day][idx]['wasted_meals_alt'] = s
    print('k', end=' ')

k k k k k k k 

In [11]:
def get_sums_for_day(attribute):
    res = dict()
    for k, v in meals_res.items():
        res[k] = dict()
        for day, meals in v.items():
            res[k][day] = 0
            for idx, l in meals.items():
                res[k][day] += l[attribute]
    return res

In [12]:
from datetime import datetime
def get_sums_for_weekday(attribute):
    res = dict()
    for k, v in meals_res.items():
        res[k] = [0] * 5
        for day, meals in v.items():
            weekday = datetime.strptime(day, "%Y-%m-%d").weekday()
            for idx, l in meals.items():
                res[k][weekday] += l[attribute]
    return res

In [13]:
from datetime import datetime
def get_weekdays(attribute):
    res = dict()
    for k, v in meals_res.items():
        res[k] = [[], [], [], [], []]
        for day, meals in v.items():
            weekday = datetime.strptime(day, "%Y-%m-%d").weekday()
            summed = 0
            for idx, l in meals.items():
                summed += l[attribute]
            res[k][weekday].append((day, summed))
    return res

In [14]:
def get_sums_for_canteen(attribute):
    res = dict()
    for k, v in meals_res.items():
        res[k] = 0
        for day, meals in v.items():
            for idx, l in meals.items():
                res[k] += l[attribute]
    return res

In [15]:
def get_all_days():
    res =[]
    for day, v in meals_res[5].items():
        res.append(day)
    return res

In [16]:
for k, v in meals_res[5].items():
    if k not in meals_res[27]:
        print(k)
    if k not in meals_res[35]:
        print('d', k)

In [17]:
r = get_sums_for_day('served_meals')

In [18]:
s = get_sums_for_canteen('served_meals')

In [19]:
t = get_sums_for_weekday('served_meals')

In [20]:
weekdays_stats = get_weekdays('served_meals')

In [21]:
import matplotlib.pyplot as plt
for k, v in weekdays_stats.items():
    for idx, l in enumerate(v):
        plt.xticks(rotation=90)
        plt.scatter([datetime.strptime(x[0], "%Y-%m-%d") for x in l], [x[1] for x in l], label=f'{weekday_dict[idx]}')
        plt.ylabel("Served meals")
        plt.title(f'{canteens_dict[k]}, {weekday_dict[idx]}')
        #plt.savefig(f'plots/{canteens_dict[k]}_{weekday_dict[idx]}.png', dpi=300)
        plt.close()

In [22]:
for k, v in weekdays_stats.items():
    plt.figure(figsize=(8, 4.5))
    plt.xticks(rotation=90)
    for idx, l in enumerate(v):
        plt.plot([datetime.strptime(x[0], "%Y-%m-%d") for x in l], [x[1] for x in l], label=f'{weekday_dict[idx]}', marker='o', markersize=3)
        plt.ylabel("Served meals")
        plt.title(f'{canteens_dict[k]}')
    plt.legend()
    plt.ylim(0)
    plt.subplots_adjust(right=0.9, bottom=0.15)
    plt.savefig(f'plots/weekday_by_canteen/{canteens_dict[k]}.png', dpi=300)
    plt.close()

In [23]:
# weekday_summed
for i in range(5):
    data = dict()
    for k, v in weekdays_stats.items():
        data[k] = v[i]

    plt.figure(figsize=(8, 4.5))
    plt.xticks(rotation=90)
    for idx, l in data.items():
        plt.plot([datetime.strptime(x[0], "%Y-%m-%d") for x in l], [x[1] for x in l], label=f'{canteens_dict[idx]}', marker='o', markersize=3)
        plt.ylabel("Served meals")
        plt.title(f'{weekday_dict[i]}')
    plt.ylim(0, 1000)
    plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    plt.subplots_adjust(right=0.7, bottom=0.15)
    plt.savefig(f'plots/weekday_summed/{weekday_dict[i]}.png', dpi=300)
    plt.close()


In [24]:
#summed over canteens
def get_weekday_sums_over_canteens(attribute):
    weekdays = get_weekdays(attribute)
    res = dict()
    for i in range(5):
        sample_res = None
        for k, v in weekdays.items():
            if sample_res is None:
                sample_res = [x for x in weekdays[k][i]]
            else:
                for j in range(len(sample_res)):
                    sample_res[j] = (sample_res[j][0], sample_res[j][1] + weekdays[k][i][j][1])
        res[i] = sample_res
    return res

weekdays_summed = get_weekday_sums_over_canteens('served_meals')

plt.figure(figsize=(8, 4.5))
plt.xticks(rotation=90)
for idx, l in weekdays_summed.items():
    filtered = [x for x in l if x[1] > 0]
    plt.plot([datetime.strptime(x[0], "%Y-%m-%d") for x in filtered], [x[1] for x in filtered], label=f'{weekday_dict[idx]}', marker='o', markersize=3)
    plt.ylabel("Served meals")
plt.ylim(ymin=1)
plt.title(f'Sums over all canteens')
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.subplots_adjust(right=0.8, bottom=0.15)
plt.savefig(f'plots/weekday_summed/sums.png', dpi=300)
plt.close()

In [25]:
# sums over all canteens without color separation
all_days = get_all_days()
sums_over_canteens = dict()
for day in all_days:
    sums_over_canteens[day] = 0
    for k, v in meals_res.items():
        if day in v:
            for idx, l in v[day].items():
                sums_over_canteens[day] += l['served_meals']
    if sums_over_canteens[day] == 0:
        del sums_over_canteens[day]

plt.figure(figsize=(8, 4.5))
plt.xticks(rotation=90)
plt.plot([datetime.strptime(k, "%Y-%m-%d") for k, v in sums_over_canteens.items()], [v for k, v in sums_over_canteens.items()], marker='o', markersize=3)
plt.ylabel("Served meals")
plt.ylim(ymin=100)
plt.title(f'Sums over all canteens')
plt.subplots_adjust(bottom=0.15)
plt.savefig(f'plots/weekday_summed/sums_2.png', dpi=300)
plt.close()

In [26]:
# statistics for wasted food, aggregated by weeks
wasted_food_days = get_sums_for_day('wasted_meals')
alt_wasted_food_days = get_sums_for_day('wasted_meals_alt')

wasted_food_weeks = dict()
for k, v in wasted_food_days.items():
    wasted_food_weeks[k] = dict()
    for idx, l in v.items():
        week = datetime.strptime(idx, "%Y-%m-%d").isocalendar()[1]
        if week not in wasted_food_weeks[k].keys():
            wasted_food_weeks[k][week] = (datetime.strptime(idx, "%Y-%m-%d"), 0, 0)
        wasted_food_weeks[k][week] = (wasted_food_weeks[k][week][0], wasted_food_weeks[k][week][1] + l, wasted_food_weeks[k][week][2] + alt_wasted_food_days[k][idx])


In [27]:
# plot for all canteens
plt.figure(figsize=(8, 4.5))
plt.xticks(rotation=90)
for idx, l in wasted_food_weeks.items():
    plt.plot([x[0] for x in l.values()], [x[1] for x in l.values()], label=f'{canteens_dict[idx]}', marker='o', markersize=3)
    plt.ylabel("Served meals")
plt.title(f'Wasted food per week')
plt.ylim(0, 400)
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.subplots_adjust(right=0.7, bottom=0.15)
plt.savefig(f'plots/wasted_food/all_wasted_food.png', dpi=300)
plt.close()

In [43]:
# alt plot for all canteens
plt.figure(figsize=(8, 4.5))
plt.xticks(rotation=90)
for idx, l in wasted_food_weeks.items():
    plt.plot([x[0] for x in l.values()], [x[2] for x in l.values()], label=f'{canteens_dict[idx]}', marker='o', markersize=3)
    plt.ylabel("Served meals")
plt.title(f'Alt wasted food per week')
plt.ylim(0, 400)
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.subplots_adjust(right=0.7, bottom=0.15)
plt.savefig(f'plots/wasted_food/all_wasted_food_alt.png', dpi=300)
plt.close()

In [29]:
# wasted food, different plot per canteen
for idx, l in wasted_food_weeks.items():
    plt.figure(figsize=(8, 4.5))
    plt.xticks(rotation=90)

    plt.plot([x[0] for x in l.values()], [x[1] for x in l.values()], label=f'{canteens_dict[idx]}', marker='o', markersize=3)
    plt.ylabel("Served meals")

    plt.title(f'Wasted food per week {canteens_dict[idx]}')
    plt.ylim(0, 400)
    plt.subplots_adjust(bottom=0.15)
    plt.savefig(f'plots/wasted_food/wasted_food_{canteens_dict[idx]}.png', dpi=300)
    plt.close()

In [30]:
# alt wasted food, different plot per canteen
for idx, l in wasted_food_weeks.items():
    plt.figure(figsize=(8, 4.5))
    plt.xticks(rotation=90)

    plt.plot([x[0] for x in l.values()], [x[2] for x in l.values()], label=f'{canteens_dict[idx]}', marker='o', markersize=3)
    plt.ylabel("Served meals")

    plt.title(f'Alt wasted food per week {canteens_dict[idx]}')
    plt.ylim(0, 400)
    plt.subplots_adjust(bottom=0.15)
    plt.savefig(f'plots/wasted_food/wasted_food_{canteens_dict[idx]}_alt.png', dpi=300)
    plt.close()

In [31]:
def calculate_meal_score(meal_lists):
    diffs = dict()
    for meal_id, meal_list in meal_lists.items():
        diffs[meal_id] = []
        for i in range(len(meal_list) - 1):
            diffs[meal_id].append(meal_list[i+1][2] - meal_list[i][2])

    active = dict()
    for meal_id, meal_list in meal_lists.items():
        active[meal_id] = []
        for i in range(len(meal_list) - 1):
            active[meal_id].append(meal_list[i][1] != 0)

    scores = dict()
    for meal_id, meal_list in meal_lists.items():
        scores[meal_id] = []
    for i in range(len(meal_list) - 1):
        total_diff = sum([diffs[meal_id][i] for meal_id in meal_lists.keys()])
        no_active_meals = sum([active[meal_id][i] for meal_id in meal_lists.keys()])
        if no_active_meals == 0 or total_diff == 0:
            continue
        for meal_id in meal_lists.keys():
            if active[meal_id][i]:
                scores[meal_id].append((diffs[meal_id][i] / total_diff) * no_active_meals)

    res = dict()
    for meal_id in meal_lists.keys():
        if len(scores[meal_id]) > 0:
            res[meal_id] = sum(scores[meal_id]) / len(scores[meal_id])

    return res

In [32]:
def calculate_alt_meal_score(meal_lists, minutes=15, tolerance=1):
    if len(meal_lists.keys()) == 0:
        return dict()

    diffs = dict()
    for meal_id, meal_list in meal_lists.items():
        diffs[meal_id] = []
        for i in range(0, len(meal_list) - 1, minutes):
            diffs[meal_id].append(meal_list[i+minutes][2] - meal_list[i][2])

    active = dict()
    for meal_id, meal_list in meal_lists.items():
        active[meal_id] = []
        for i in range(0, len(meal_list) - 1, minutes):
            act_sum = sum([meal_list[i+j][1] != 0 for j in range(minutes)])
            if act_sum >= minutes - tolerance:
                active[meal_id].append(True)
            else:
                active[meal_id].append(False)

    scores = dict()
    for meal_id, meal_list in meal_lists.items():
        scores[meal_id] = []
    index_diffs = next(iter(diffs))
    for i in range(len(diffs[index_diffs])):
        for meal_id in meal_lists.keys():
            if len(diffs[meal_id]) <= i:
                x = 7
        total_diff = sum([diffs[meal_id][i] for meal_id in meal_lists.keys()])
        no_active_meals = sum([active[meal_id][i] for meal_id in meal_lists.keys()])
        if no_active_meals == 0 or total_diff == 0:
            continue
        for meal_id in meal_lists.keys():
            if active[meal_id][i]:
                scores[meal_id].append((diffs[meal_id][i] / total_diff) * no_active_meals)

    res = dict()
    for meal_id in meal_lists.keys():
        if len(scores[meal_id]) > 0:
            res[meal_id] = sum(scores[meal_id]) / len(scores[meal_id])

    return res

for canteen_id, day_dicts in meals_db.items():
    for day, meal_lists in day_dicts.items():
        sc = calculate_alt_meal_score(meal_lists, minutes=15, tolerance=1)
        for meal_id, score in sc.items():
            meals_res[canteen_id][day][meal_id]['score'] = score
    print('done', canteen_id)


done 5
done 7
done 15
done 21
done 54
done 27
done 35


In [33]:
x = 7

In [34]:
current_canteen_ids = meals_res.keys()
for row in meals_dataframe.itertuples():
    if row.canteenId in current_canteen_ids:
        if row.kindId == 3:
            if row.day >= '2022-10':
                if row.day in meals_res[row.canteenId].keys():
                    if row.altId in meals_res[row.canteenId][row.day].keys():
                        meals_res[row.canteenId][row.day][row.altId]['price2'] = row.price2
                        meals_res[row.canteenId][row.day][row.altId]['name'] = row.name
    print('k', end='')

kkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkk

In [35]:
x = 7

In [36]:
# plot based on price and score and colored by time
for canteen_id, day_dicts in meals_res.items():
    data = []
    for day, meal_lists in day_dicts.items():
        for meal_id, meal_dict in meal_lists.items():
            if 'price2' in meal_dict.keys() and 'score' in meal_dict.keys():
                data.append((meal_dict['price2']/100, meal_dict['score'], len(data)))

    plt.figure(figsize=(8, 4.5))
    plt.scatter([x[0] for x in data], [x[1] for x in data], c=[x[2] for x in data], s=20, cmap='plasma')
    plt.xlabel('Price for students')
    plt.ylabel('Score')
    plt.title(f'{canteens_dict[canteen_id]} colored by time')
    plt.savefig(f'plots/scores/orig_{canteens_dict[canteen_id]}.png', dpi=300)
    plt.close()


In [37]:
# plot based on time and score and colored by price
for canteen_id, day_dicts in meals_res.items():
    data = []
    for day, meal_lists in day_dicts.items():
        for meal_id, meal_dict in meal_lists.items():
            if 'price2' in meal_dict.keys() and 'score' in meal_dict.keys():
                data.append((meal_dict['price2']/100, meal_dict['score'], datetime.strptime(day, "%Y-%m-%d")))

    plt.figure(figsize=(8, 4.5))
    plt.scatter([x[2] for x in data], [x[1] for x in data], c=[x[0] for x in data], s=20, cmap='YlOrRd')
    plt.xlabel('Time')
    plt.ylabel('Score')
    plt.title(f'{canteens_dict[canteen_id]} colored by price')
    plt.savefig(f'plots/scores/time_{canteens_dict[canteen_id]}.png', dpi=300)
    plt.close()

In [38]:
# plot based on time and price and colored by score
for canteen_id, day_dicts in meals_res.items():
    data = []
    for day, meal_lists in day_dicts.items():
        for meal_id, meal_dict in meal_lists.items():
            if 'price2' in meal_dict.keys() and 'score' in meal_dict.keys():
                data.append((meal_dict['price2']/100, meal_dict['score'], datetime.strptime(day, "%Y-%m-%d")))

    plt.figure(figsize=(8, 4.5))
    plt.scatter([x[2] for x in data], [x[0] for x in data], c=[x[1] for x in data], s=20, cmap='YlGn')
    plt.xlabel('Time')
    plt.ylabel('Price')
    plt.title(f'{canteens_dict[canteen_id]} colored by score')
    plt.savefig(f'plots/scores/score_{canteens_dict[canteen_id]}.png', dpi=300)
    plt.close()

In [39]:
# calculate meals by interval by weekday
meal_list_length = 241
def calculate_meal_by_interval(meal_lists, minutes=15):
    if len(meal_lists.keys()) == 0:
        return dict()

    diffs = dict()
    for i in range(0, meal_list_length - 1, minutes):
        summ = 0
        for meal_id, meal_list in meal_lists.items():
            summ += meal_list[i+minutes][2] - meal_list[i][2]
        diffs[meal_list[i][0]] = summ
    return diffs

counts_by_intervals = dict()

for canteen_id, day_dicts in meals_db.items():
    counts_by_intervals[canteen_id] = dict()
    for day, meal_lists in day_dicts.items():
        interv = calculate_meal_by_interval(meal_lists, minutes=15)
        for time_point, val in interv.items():
            week_day_calculated = datetime.strptime(day, "%Y-%m-%d").weekday()
            if week_day_calculated not in counts_by_intervals[canteen_id].keys():
                counts_by_intervals[canteen_id][week_day_calculated] = defaultdict(int)
            counts_by_intervals[canteen_id][week_day_calculated][time_point.strftime('%H:%M')] += val
    print('done', canteen_id)

done 5
done 7
done 15
done 21
done 54
done 27
done 35


In [40]:
x = 7

In [41]:
# plot meals by interval
for canteen_id, day_dicts in meals_res.items():
    data = counts_by_intervals[canteen_id]

    plt.figure(figsize=(8, 4.5))
    plt.xticks(rotation=90)
    for idx, l in data.items():
        plt.plot(l.keys(), l.values(), label=f'{weekday_dict[idx]}', marker='o', markersize=3)
    plt.ylabel("Served meals")
    plt.ylim(0)
    plt.title(f'Meals served in {canteens_dict[canteen_id]}')
    plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    plt.subplots_adjust(right=0.8, bottom=0.15)
    plt.savefig(f'plots/by_interval/{canteens_dict[canteen_id]}_intervals.png', dpi=300)
    plt.close()


In [42]:
# poznámky - jídla se berou bez objednaných, takže se bere pouze "populace těch, kteří si vybírají na místě"

# Wednesday 30.11. - dolní peak - před 13:00 vypadl buď systém, anebo bot sbírající data
# Tuesday 21.3. - dolní peak - před 12:00 vypadl buď systém, anebo bot sbírající data
# Tuesday 10. měsíc Troja - nefungovala elektřina
# Friday 18.11. - den po volnu 17.11.
# Monday 2.1. - nebyla škola


# Právnická označovala available počet meals jako -1 během většiny roku