# Agregace a řazení

In [1]:
import pandas as pd

# Příprava vstupního DataFramu
food_nutrient = pd.read_csv("food_nutrient.csv")
food_sample_100 = pd.read_csv("food_sample_100.csv")
food_other = pd.read_csv("food_other.csv")
food = pd.concat([food_sample_100, food_other], ignore_index=True)
food_merged = pd.merge(food, food_nutrient, on="fdc_id")
branded_food = pd.read_csv("branded_food.csv")
food_merged_brands = pd.merge(food_merged, branded_food, on="fdc_id")

food_merged_brands = food_merged_brands.rename(columns={"name": "nutrient_name"})

In [65]:
food_merged_brands_protein = food_merged_brands[food_merged_brands["nutrient_name"] == "Protein"]
food_merged_brands_protein.shape

(9477, 36)

In [57]:
food_merged_brands_protein_agg = food_merged_brands_protein.groupby("branded_food_category")["amount"].mean()
food_merged_brands_protein_agg

branded_food_category
Alcohol                          0.684000
All Noodles                     11.107368
Baby/Infant  Foods/Beverages     1.140000
Bacon                           45.200000
Bacon, Sausages & Ribs          27.239630
                                  ...    
Weight Control                   3.080000
Wholesome Snacks                 1.485645
Yogurt                           5.596461
Yogurt (Perishable)              4.320000
Yogurt/Yogurt Substitutes        5.578333
Name: amount, Length: 193, dtype: float64

In [None]:
# Agregace podle více sloupců, např. seskupení podle nutrientu a podle kategorie
food_merged_brands_multiagg = food_merged_brands.groupby(["branded_food_category", "subbrand_name"])["amount"].mean()
# food_merged_brands_multiagg

# Převod na DataFrame pro lepší přehlednost
pd.DataFrame(food_merged_brands_multiagg)

Unnamed: 0_level_0,Unnamed: 1_level_0,amount
branded_food_category,subbrand_name,Unnamed: 2_level_1
Alcoholic Beverages,Grand Cru,2
All Noodles,CUP NOODLES,15
All Noodles,FUNOODLES,15
"Bacon, Sausages & Ribs",BLACK LABEL,14
Baking Decorations & Dessert Toppings,COCOWHIP,13
...,...,...
Yogurt,PLENTI,15
Yogurt,PRO,14
Yogurt,SIMPLY 100,15
Yogurt,TRIPLE ZERO,14


In [60]:
# Více agregací pomocí metody agg(), např. kromě průměru chceme vypsat i počet v každé skupině
food_merged_brands_multiagg = food_merged_brands.groupby(["branded_food_category", "subbrand_name"])["amount"].agg(["count", "mean"])
pd.DataFrame(food_merged_brands_multiagg)

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean
branded_food_category,subbrand_name,Unnamed: 2_level_1,Unnamed: 3_level_1
Alcoholic Beverages,Grand Cru,2,53.000000
All Noodles,CUP NOODLES,15,185.314667
All Noodles,FUNOODLES,15,77.074667
"Bacon, Sausages & Ribs",BLACK LABEL,14,237.918571
Baking Decorations & Dessert Toppings,COCOWHIP,13,21.768462
...,...,...,...
Yogurt,PLENTI,15,33.448000
Yogurt,PRO,14,34.210000
Yogurt,SIMPLY 100,15,26.086667
Yogurt,TRIPLE ZERO,14,20.642857


## Řazení

In [61]:
# Seřazení agregovaných dat
# food_merged_brands_protein_agg.sort_values(ascending=False).head(10)

# Jednodušší alternativa: nlargest()
food_merged_brands_protein_agg.nlargest(10)

branded_food_category
Bacon                                                  45.200000
Green Supplements                                      44.590000
Drinks Flavoured - Ready to Drink                      32.900000
Energy, Protein & Muscle Recovery Drinks               31.831481
Bacon, Sausages & Ribs                                 27.239630
Canned Tuna                                            21.648889
Meat/Poultry/Other Animals  Prepared/Processed         21.522368
Nut & Seed Butters                                     20.651270
Cheese                                                 19.969457
Meat/Poultry/Other Animals – Unprepared/Unprocessed    19.640000
Name: amount, dtype: float64

In [62]:
# Seřazení celého DataFramu sestupně podle sloupce "amount"
food_merged_brands_protein_sorted = food_merged_brands_protein.sort_values("amount", ascending=False)
food_merged_brands_protein_sorted

Unnamed: 0,fdc_id,brand_owner,brand_name,subbrand_name,gtin_upc,ingredients,not_a_significant_source_of,serving_size,serving_size_unit,household_serving_fulltext,...,data_points,derivation_id,min,max,median,loq,footnote,min_year_acquired,nutrient_name,unit_name
79146,1842352,"Supervalu, Inc.",BASICS FOR KIDS,,41163416963,"WATER, SUGAR (SUCROSE), MALTODEXTRIN (CORN), M...",,240.00,ml,,...,,70.0,,,,,,,Protein,G
61975,1593315,Abbott Laboratories Inc,PEDIASURE,,70074624853,"WATER, SUGAR, MILK PROTEIN CONCENTRATE, SOY OI...",,240.00,ml,,...,,70.0,,,,,,,Protein,G
110139,2399314,Wehah Farm Inc,LUNDBERG FAMILY FARMS,,73416401341,ORGANIC LONG GRAIN WHITE RICE.,,45.00,g,4 cup,...,,70.0,,,,,,,Protein,G
105018,2280607,"Optimum Nutrition, Inc.",OPTIMUM NUTRITION,,748927060713,"PARTIALLY HYDROLYZED WHEY PROTEIN ISOLATE, WHE...","Not a significant source of saturated fat, tra...",30.00,g,1 Scoop,...,,70.0,,,,,,,Protein,G
82115,1918665,Amav Enterprises Ltd.,OMEGA NUTRITION,,55358220006,"LACTIC BACTERIA AND BENEFICIAL YEASTS, SKIM MI...",,1.25,g,,...,,73.0,,,,,,,Protein,G
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
78223,1832402,Ferrara Candy Company,BRACH'S,,41420053573,"SUGAR, CORN SYRUP, MODIFIED FOOD STARCH (CORN)...",,31.00,g,,...,,70.0,,,,,,,Protein,G
78254,1833030,The Kroger Co.,KROGER,,11110089229,"MANDARIN ORANGES, WATER, WHITE GRAPE JUICE CON...",,113.00,g,,...,,70.0,,,,,,,Protein,G
78260,1833192,GREEN COLA NORTH AMERICA LLC,GREEN COLA,,850010788104,"CARBONATED WATER, CARAMEL COLOR, TARTARIC ACID...","Not a significant source of saturated fat, tra...",360.00,ml,,...,,70.0,,,,,,,Protein,G
16226,551582,REAL COCONUT WATER,,,857862001757,"YOUNG COCONUT WATER, COCONUT PULP",,330.00,ml,11.2 OZA,...,,70.0,,,,,,,Protein,G
