In [1]:
import numpy as np
import pandas as pd

In [2]:
def reduce_mem_usage(df, verbose=True):
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    start_mem = df.memory_usage().sum() / 1024**2    
    for col in df.columns:
        col_type = df[col].dtypes
        if col_type in numerics:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)    
    end_mem = df.memory_usage().sum() / 1024**2
    if verbose: print('Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)'.format(end_mem, 100 * (start_mem - end_mem) / start_mem))
    return df

def load_data():
    train_df = reduce_mem_usage(pd.read_csv("../input/m5-forecasting-accuracy/sales_train_evaluation.csv"))
    return train_df

train_df = load_data()

Mem. usage decreased to 96.13 Mb (78.8% reduction)


In [3]:
# All products, aggregated for all stores and states
train_df.iloc[:, 6:].sum()

d_1       32631
d_2       31749
d_3       23783
d_4       25412
d_5       19146
          ...  
d_1937    37096
d_1938    36963
d_1939    42552
d_1940    51518
d_1941    54338
Length: 1941, dtype: int64

In [4]:
# All products, aggregated for each state
train_df.groupby("state_id").sum()

Unnamed: 0_level_0,d_1,d_2,d_3,d_4,d_5,d_6,d_7,d_8,d_9,d_10,...,d_1932,d_1933,d_1934,d_1935,d_1936,d_1937,d_1938,d_1939,d_1940,d_1941
state_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
CA,14195,13805,10108,11047,9925,11322,12251,16610,14696,11822,...,18471,23037,24704,17721.0,16150.0,15678.0,16297.0,17430.0,23103,24644.0
TX,9438,9630,6778,7381,5912,9006,6226,9440,9376,7319,...,12320,12366,16249,12228.0,11370.0,10375.0,9162.0,12303.0,13681,14815.0
WI,8998,8314,6897,6984,3309,8883,9533,11882,8664,6431,...,13282,18905,18968,12413.0,11257.0,11043.0,11504.0,12819.0,14734,14879.0


In [5]:
# All products, aggregated for each store
train_df.groupby("store_id").sum()

Unnamed: 0_level_0,d_1,d_2,d_3,d_4,d_5,d_6,d_7,d_8,d_9,d_10,...,d_1932,d_1933,d_1934,d_1935,d_1936,d_1937,d_1938,d_1939,d_1940,d_1941
store_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
CA_1,4337,4155,2816,3051,2630,3276,3450,5437,4340,3157,...,4951,6245,6707,4568.0,3949.0,3995.0,4136.0,4433.0,5764,6289.0
CA_2,3494,3046,2121,2324,1942,2288,2629,3729,2957,2218,...,4839,6507,6710,4238.0,3963.0,3789.0,4037.0,4751.0,7120,6614.0
CA_3,4739,4827,3785,4232,3817,4369,4703,5456,5581,4912,...,6046,7029,7969,5891.0,5523.0,5375.0,5580.0,5542.0,7073,8144.0
CA_4,1625,1777,1386,1440,1536,1389,1469,1988,1818,1535,...,2635,3256,3318,3024.0,2715.0,2519.0,2544.0,2704.0,3146,3597.0
TX_1,2556,2687,1822,2258,1694,2734,1691,2820,2887,2174,...,3610,3787,5000,3708.0,3310.0,3147.0,2748.0,3664.0,4167,4624.0
TX_2,3852,3937,2731,2954,2492,3439,2588,3772,3657,2932,...,4415,4424,5866,4179.0,4008.0,3518.0,3126.0,4249.0,4802,5217.0
TX_3,3030,3006,2225,2169,1726,2833,1947,2848,2832,2213,...,4295,4155,5383,4341.0,4052.0,3710.0,3288.0,4390.0,4712,4974.0
WI_1,2704,2194,1562,1251,2,2049,2815,3248,1674,1355,...,3978,5527,5488,3505.0,3323.0,3242.0,3478.0,3813.0,5002,5054.0
WI_2,2256,1922,2018,2522,1175,2244,2232,2643,2140,1836,...,5317,7704,7586,5011.0,4628.0,4533.0,4628.0,4880.0,5213,5068.0
WI_3,4038,4198,3317,3211,2132,4590,4486,5991,4850,3240,...,3987,5674,5894,3897.0,3306.0,3268.0,3398.0,4126.0,4519,4757.0


In [6]:
# All products, aggregated for each category
train_df.groupby("cat_id").sum()

Unnamed: 0_level_0,d_1,d_2,d_3,d_4,d_5,d_6,d_7,d_8,d_9,d_10,...,d_1932,d_1933,d_1934,d_1935,d_1936,d_1937,d_1938,d_1939,d_1940,d_1941
cat_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
FOODS,23178.0,22758.0,17174.0,18878.0,14603.0,22093.0,20490.0,27751.0,24862.0,18901.0,...,30107.0,36576.0,41447.0,29158.0,26148.0,24790.0,24737.0,28136.0,33599.0,35967.0
HOBBIES,3764.0,3357.0,2682.0,2669.0,1814.0,3220.0,2944.0,3986.0,2899.0,2615.0,...,3871.0,4893.0,5016.0,3528.0,3629.0,3740.0,3475.0,4143.0,5333.0,5280.0
HOUSEHOLD,5689.0,5634.0,3927.0,3865.0,2729.0,3898.0,4576.0,6195.0,4975.0,4056.0,...,10095.0,12839.0,13458.0,9676.0,9000.0,8566.0,8751.0,10273.0,12586.0,13091.0


In [7]:
# All products, aggregated for each department
train_df.groupby("dept_id").sum()

Unnamed: 0_level_0,d_1,d_2,d_3,d_4,d_5,d_6,d_7,d_8,d_9,d_10,...,d_1932,d_1933,d_1934,d_1935,d_1936,d_1937,d_1938,d_1939,d_1940,d_1941
dept_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
FOODS_1,2343,2216,1657,1508,1209,1897,1903,2235,1925,1586,...,4130,4257,4225,3264.0,3119.0,3073.0,3251.0,3735.0,4195,4031.0
FOODS_2,4094,4209,3174,3606,2869,4375,3349,4384,4273,3684,...,5842,7563,9069,6226.0,5311.0,4648.0,4551.0,4815.0,5940,6535.0
FOODS_3,16741,16333,12343,13764,10525,15821,15238,21132,18664,13631,...,20135,24756,28153,19668.0,17718.0,17069.0,16935.0,19586.0,23464,25401.0
HOBBIES_1,3610,3172,2497,2531,1714,3133,2855,3831,2741,2514,...,3422,4414,4464,3109.0,3142.0,3291.0,3027.0,3761.0,4832,4720.0
HOBBIES_2,154,185,185,138,100,87,89,155,158,101,...,449,479,552,419.0,487.0,449.0,448.0,382.0,501,560.0
HOUSEHOLD_1,4105,3858,2827,2732,1802,2664,3161,4334,3494,2857,...,7936,10042,10572,7609.0,6999.0,6641.0,6759.0,7974.0,9668,10165.0
HOUSEHOLD_2,1584,1776,1100,1133,927,1234,1415,1861,1481,1199,...,2159,2797,2886,2067.0,2001.0,1925.0,1992.0,2299.0,2918,2926.0


In [8]:
# All products, aggregated for each state and category
train_df.groupby(["state_id", "cat_id"]).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,d_1,d_2,d_3,d_4,d_5,d_6,d_7,d_8,d_9,d_10,...,d_1932,d_1933,d_1934,d_1935,d_1936,d_1937,d_1938,d_1939,d_1940,d_1941
state_id,cat_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
CA,FOODS,10101,9862,6944,7864,7178,8256,9005,11870,10977,8637,...,12151,14447,16197,11859.0,10759.0,10465.0,10650.0,11354.0,14489,15970.0
CA,HOBBIES,1802,1561,1472,1405,1181,1459,1314,1986,1482,1508,...,1831,2638,2401,1554.0,1649.0,1606.0,1735.0,1975.0,2701,2514.0
CA,HOUSEHOLD,2292,2382,1692,1778,1566,1607,1932,2754,2237,1677,...,4489,5952,6106,4308.0,3742.0,3607.0,3912.0,4101.0,5913,6160.0
TX,FOODS,6853,7030,5124,5470,4602,7067,4671,7055,6920,5505,...,8369,8095,10663,7905.0,7164.0,6596.0,6083.0,7965.0,8834,9500.0
TX,HOBBIES,879,870,526,809,501,831,390,785,794,524,...,1028,933,1461,1205.0,1199.0,1056.0,830.0,1132.0,1440,1543.0
TX,HOUSEHOLD,1706,1730,1128,1102,809,1108,1165,1600,1662,1290,...,2923,3338,4125,3118.0,3007.0,2723.0,2249.0,3206.0,3407,3772.0
WI,FOODS,6224,5866,5106,5544,2823,6770,6814,8826,6965,4759,...,9587,14034,14587,9394.0,8225.0,7729.0,8004.0,8817.0,10276,10497.0
WI,HOBBIES,1083,926,684,455,132,930,1240,1215,623,583,...,1012,1322,1154,769.0,781.0,1078.0,910.0,1036.0,1192,1223.0
WI,HOUSEHOLD,1691,1522,1107,985,354,1183,1479,1841,1076,1089,...,2683,3549,3227,2250.0,2251.0,2236.0,2590.0,2966.0,3266,3159.0


In [9]:
# All products, aggregated for each state and department
train_df.groupby(["state_id", "dept_id"]).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,d_1,d_2,d_3,d_4,d_5,d_6,d_7,d_8,d_9,d_10,...,d_1932,d_1933,d_1934,d_1935,d_1936,d_1937,d_1938,d_1939,d_1940,d_1941
state_id,dept_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
CA,FOODS_1,1157,1142,787,743,729,796,913,1036,1049,804,...,1931,1854,1820,1408.0,1316.0,1374.0,1364.0,1631.0,2095,2041.0
CA,FOODS_2,1864,1987,1315,1452,1239,1427,1554,1962,1928,1787,...,2108,2396,3130,2203.0,1955.0,1736.0,1795.0,1805.0,2421,2888.0
CA,FOODS_3,7080,6733,4842,5669,5210,6033,6538,8872,8000,6046,...,8112,10197,11247,8248.0,7488.0,7355.0,7491.0,7918.0,9973,11041.0
CA,HOBBIES_1,1739,1502,1422,1352,1139,1427,1281,1920,1419,1471,...,1643,2386,2195,1415.0,1495.0,1460.0,1545.0,1816.0,2469,2248.0
CA,HOBBIES_2,63,59,50,53,42,32,33,66,63,37,...,188,252,206,139.0,154.0,146.0,190.0,159.0,232,266.0
CA,HOUSEHOLD_1,1527,1515,1116,1148,935,1014,1224,1834,1496,1089,...,3378,4398,4598,3267.0,2799.0,2663.0,2928.0,3045.0,4342,4638.0
CA,HOUSEHOLD_2,765,867,576,630,631,593,708,920,741,588,...,1111,1554,1508,1041.0,943.0,944.0,984.0,1056.0,1571,1522.0
TX,FOODS_1,500,446,381,317,297,445,337,466,432,368,...,1022,924,1025,824.0,797.0,749.0,839.0,998.0,980,962.0
TX,FOODS_2,1384,1426,1008,1316,1143,1826,917,1319,1280,1111,...,1320,1339,1914,1521.0,1254.0,1019.0,1024.0,1127.0,1398,1428.0
TX,FOODS_3,4969,5158,3735,3837,3162,4796,3417,5270,5208,4026,...,6027,5832,7724,5560.0,5113.0,4828.0,4220.0,5840.0,6456,7110.0


In [10]:
# All products, aggregated for each store and category
train_df.groupby(["store_id", "cat_id"]).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,d_1,d_2,d_3,d_4,d_5,d_6,d_7,d_8,d_9,d_10,...,d_1932,d_1933,d_1934,d_1935,d_1936,d_1937,d_1938,d_1939,d_1940,d_1941
store_id,cat_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
CA_1,FOODS,3239,3137,2008,2258,2032,2407,2693,3962,3418,2446,...,3365,4046,4717,3198.0,2794.0,2816.0,2854.0,2993.0,3783,4327.0
CA_1,HOBBIES,556,498,415,392,268,467,346,769,432,384,...,588,806,687,417.0,393.0,381.0,406.0,539.0,719,646.0
CA_1,HOUSEHOLD,542,520,393,401,330,402,411,706,490,327,...,998,1393,1303,953.0,762.0,798.0,876.0,901.0,1262,1316.0
CA_2,FOODS,2193,1921,1289,1540,1278,1494,1785,2385,1827,1355,...,3255,3972,4285,2902.0,2649.0,2561.0,2725.0,3179.0,4502,4198.0
CA_2,HOBBIES,538,397,368,350,296,391,316,413,452,403,...,370,687,588,326.0,385.0,334.0,357.0,434.0,762,660.0
CA_2,HOUSEHOLD,763,728,464,434,368,403,528,931,678,460,...,1214,1848,1837,1010.0,929.0,894.0,955.0,1138.0,1856,1756.0
CA_3,FOODS,3446,3535,2701,3064,2761,3340,3455,4068,4324,3718,...,3864,4311,4980,3685.0,3491.0,3425.0,3456.0,3497.0,4315,5069.0
CA_3,HOBBIES,550,430,438,424,364,390,493,525,375,478,...,502,665,713,482.0,532.0,565.0,596.0,571.0,675,754.0
CA_3,HOUSEHOLD,743,862,646,744,692,639,755,863,882,716,...,1680,2053,2276,1724.0,1500.0,1385.0,1528.0,1474.0,2083,2321.0
CA_4,FOODS,1223,1269,946,1002,1107,1015,1072,1455,1408,1118,...,1667,2118,2215,2074.0,1825.0,1663.0,1615.0,1685.0,1889,2376.0


In [11]:
# All products, aggregated for each store and department
train_df.groupby(["store_id", "dept_id"]).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,d_1,d_2,d_3,d_4,d_5,d_6,d_7,d_8,d_9,d_10,...,d_1932,d_1933,d_1934,d_1935,d_1936,d_1937,d_1938,d_1939,d_1940,d_1941
store_id,dept_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
CA_1,FOODS_1,297,284,214,175,182,191,224,263,245,176,...,412,435,415,277.0,302.0,397.0,330.0,310.0,427,334.0
CA_1,FOODS_2,674,655,396,476,354,486,554,747,581,559,...,545,611,781,555.0,476.0,427.0,411.0,434.0,627,719.0
CA_1,FOODS_3,2268,2198,1398,1607,1496,1730,1915,2952,2592,1711,...,2408,3000,3521,2366.0,2016.0,1992.0,2113.0,2249.0,2729,3274.0
CA_1,HOBBIES_1,528,489,409,383,263,453,339,750,425,375,...,546,732,631,389.0,347.0,353.0,354.0,494.0,656,588.0
CA_1,HOBBIES_2,28,9,6,9,5,14,7,19,7,9,...,42,74,56,28.0,46.0,28.0,52.0,45.0,63,58.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
WI_3,FOODS_3,2293,2383,1841,1965,1427,2760,2727,3786,3179,1924,...,2061,2815,3162,1952.0,1602.0,1594.0,1627.0,2011.0,2360,2632.0
WI_3,HOBBIES_1,256,342,228,183,70,285,357,399,216,222,...,202,345,294,187.0,174.0,287.0,220.0,242.0,273,305.0
WI_3,HOBBIES_2,22,14,20,11,4,8,3,13,4,21,...,21,37,38,11.0,45.0,45.0,46.0,27.0,31,37.0
WI_3,HOUSEHOLD_1,584,541,420,327,151,392,492,647,418,354,...,608,937,874,626.0,597.0,486.0,595.0,764.0,723,723.0


In [12]:
# Each product, aggregated for all stores/states
train_df.groupby(["item_id"]).sum()

Unnamed: 0_level_0,d_1,d_2,d_3,d_4,d_5,d_6,d_7,d_8,d_9,d_10,...,d_1932,d_1933,d_1934,d_1935,d_1936,d_1937,d_1938,d_1939,d_1940,d_1941
item_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
FOODS_1_001,6,6,4,6,7,18,10,4,11,10,...,7,8,8,4.0,7.0,7.0,5.0,7.0,5,9.0
FOODS_1_002,4,5,7,4,3,4,1,7,2,4,...,5,8,7,2.0,6.0,5.0,0.0,6.0,6,4.0
FOODS_1_003,14,8,3,6,3,8,13,10,11,6,...,7,8,6,11.0,9.0,7.0,7.0,10.0,6,5.0
FOODS_1_004,0,0,0,0,0,0,0,0,0,0,...,115,104,107,81.0,73.0,62.0,71.0,75.0,83,93.0
FOODS_1_005,34,32,13,20,10,21,18,20,25,41,...,15,16,25,9.0,7.0,25.0,19.0,25.0,17,19.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HOUSEHOLD_2_512,5,4,1,3,2,4,2,8,7,5,...,5,7,11,4.0,8.0,3.0,4.0,5.0,4,7.0
HOUSEHOLD_2_513,0,0,0,0,0,0,0,0,0,0,...,4,0,5,3.0,6.0,3.0,0.0,9.0,3,4.0
HOUSEHOLD_2_514,4,8,2,1,1,2,3,8,2,1,...,2,2,2,6.0,1.0,0.0,1.0,4.0,3,3.0
HOUSEHOLD_2_515,0,0,0,0,0,0,0,0,0,0,...,1,1,5,5.0,1.0,2.0,3.0,0.0,3,1.0


In [13]:
# Each product, aggregated for each state
train_df.groupby(["state_id", "item_id"]).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,d_1,d_2,d_3,d_4,d_5,d_6,d_7,d_8,d_9,d_10,...,d_1932,d_1933,d_1934,d_1935,d_1936,d_1937,d_1938,d_1939,d_1940,d_1941
state_id,item_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
CA,FOODS_1_001,6,3,2,3,7,5,8,3,5,2,...,5,5,3,1.0,1.0,2.0,2.0,4.0,4,2.0
CA,FOODS_1_002,3,3,4,4,3,3,0,2,1,1,...,3,4,6,2.0,1.0,2.0,0.0,4.0,4,4.0
CA,FOODS_1_003,9,4,3,4,2,5,7,3,4,2,...,4,3,4,8.0,8.0,5.0,3.0,3.0,4,4.0
CA,FOODS_1_004,0,0,0,0,0,0,0,0,0,0,...,30,35,15,20.0,15.0,24.0,13.0,17.0,26,37.0
CA,FOODS_1_005,23,13,6,15,7,8,7,9,19,30,...,10,8,13,4.0,2.0,5.0,11.0,21.0,7,14.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
WI,HOUSEHOLD_2_512,2,3,0,1,0,0,0,4,2,1,...,3,1,4,3.0,0.0,2.0,0.0,0.0,0,3.0
WI,HOUSEHOLD_2_513,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0.0,0.0,0.0,0.0,0.0,0,0.0
WI,HOUSEHOLD_2_514,1,1,1,0,0,0,0,3,0,0,...,0,0,0,1.0,0.0,0.0,0.0,3.0,0,0.0
WI,HOUSEHOLD_2_515,0,0,0,0,0,0,0,0,0,0,...,0,0,1,4.0,1.0,1.0,0.0,0.0,0,1.0


In [14]:
# Each product, aggregated for each store
train_df.groupby(["store_id", "item_id"]).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,d_1,d_2,d_3,d_4,d_5,d_6,d_7,d_8,d_9,d_10,...,d_1932,d_1933,d_1934,d_1935,d_1936,d_1937,d_1938,d_1939,d_1940,d_1941
store_id,item_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
CA_1,FOODS_1_001,3,0,0,1,4,2,0,2,0,0,...,2,3,1,0,0,0,1,0,0,0
CA_1,FOODS_1_002,0,1,0,1,1,1,0,0,0,0,...,2,1,0,1,1,1,0,1,1,2
CA_1,FOODS_1_003,0,0,0,0,1,3,1,1,1,0,...,2,0,0,0,0,0,2,2,0,1
CA_1,FOODS_1_004,0,0,0,0,0,0,0,0,0,0,...,5,6,2,5,1,2,1,1,3,4
CA_1,FOODS_1_005,3,9,3,3,0,2,1,2,1,7,...,2,1,11,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
WI_3,HOUSEHOLD_2_512,0,2,0,0,0,0,0,3,0,0,...,2,0,0,3,0,1,0,0,0,0
WI_3,HOUSEHOLD_2_513,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
WI_3,HOUSEHOLD_2_514,0,1,1,0,0,0,0,2,0,0,...,0,0,0,0,0,0,0,1,0,0
WI_3,HOUSEHOLD_2_515,0,0,0,0,0,0,0,0,0,0,...,0,0,1,4,1,0,0,0,0,0
