In [1]:
import pandas as pd
import numpy as np

In [2]:
from datetime import datetime

In [3]:
users = pd.read_csv('data/Users.csv')
orders = pd.read_csv('data/Orders_with_time.csv')
products = pd.read_csv('data/Products.csv')

In [13]:
orders_final = orders[:1000]

In [22]:
orders_final['time'] = pd.to_datetime(orders_final['time'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  orders_final['time'] = pd.to_datetime(orders_final['time'])


In [23]:
products.head()

Unnamed: 0,Name,BarCode,Price,LoaltyCoeff,Point_id
0,Chocolate Sandwich Cookies,903304753514,21.6,0.99,6191
1,All-Seasons Salt,401309075522,25.9,0.58,6870
2,Robust Golden Unsweetened Oolong Tea,851892036179,8.47,0.66,406
3,Smart Ones Classic Favorites Mini Rigatoni Wit...,445454311328,13.53,0.77,5459
4,Green Chile Anytime Sauce,304753955703,22.73,0.24,4954


In [24]:
orders_products = orders_final.merge(products, left_on='product_id', left_index=False, right_index=True)

In [25]:
orders_products.head()

Unnamed: 0,order_id,user_id,order_number,order_dow,order_hour_of_day,days_since_last_order,product_id,reordered,time,Name,BarCode,Price,LoaltyCoeff,Point_id
0,2539329.0,1.0,1.0,2.0,8.0,,196.0,0.0,2012-10-01 00:00:00,Cold Brew Coffee Tahitian Vanilla,284125606973,16.19,0.39,7309
5,2398795.0,1.0,2.0,3.0,7.0,15.0,196.0,1.0,2012-10-01 00:05:00,Cold Brew Coffee Tahitian Vanilla,284125606973,16.19,0.39,7309
11,473747.0,1.0,3.0,3.0,12.0,21.0,196.0,1.0,2012-10-01 00:11:00,Cold Brew Coffee Tahitian Vanilla,284125606973,16.19,0.39,7309
16,2254736.0,1.0,4.0,4.0,7.0,29.0,196.0,1.0,2012-10-01 00:16:00,Cold Brew Coffee Tahitian Vanilla,284125606973,16.19,0.39,7309
21,431534.0,1.0,5.0,4.0,15.0,28.0,196.0,1.0,2012-10-01 00:21:00,Cold Brew Coffee Tahitian Vanilla,284125606973,16.19,0.39,7309


In [56]:
orders_final.iloc[15].time.weekday()

0

In [75]:
def get_point_statistics(point_id=2, n=10):
    product_profits = np.zeros(len(products.index.values))

    day_demand = np.zeros(3)
    week_demand = np.zeros(7)
    month_demand = np.zeros(12)

    day_profit = np.zeros(3)
    week_profit = np.zeros(7)
    month_profit = np.zeros(12)

    for _, order_product in orders_products.iterrows():
        product_profits[int(order_product.product_id)] += order_product.Price

        if  order_product.time.hour < 12:
            day_demand[0] += 1
            day_profit[0] += order_product.Price
        elif order_product.time.hour < 18:
            day_demand[1] += 1
            day_profit[1] += order_product.Price
        else:
            day_demand[2] += 1
            day_profit[2] += order_product.Price

        week_demand[int(order_product.time.weekday())] += 1
        week_profit[int(order_product.time.weekday())] += order_product.Price

        month_demand[int(order_product.time.month)] += 1
        month_profit[int(order_product.time.month)] += order_product.Price


    args = np.argsort(product_profits)

    most_popular_profitable = args[:n]
    least_popular_profitable= args[:-n-1:-1]

    return {
        'day_demand_mean': np.mean(day_demand),
        'week_demand_mean': np.mean(week_demand),
        'year_demand_mean': np.mean(month_demand),

        'day_profit_mean': np.mean(day_profit),
        'week_profit_mean': np.mean(week_profit),
        'year_profit_mean': np.mean(month_profit),

        'day_demand': day_demand / sum(day_demand),
        'week_demand': week_demand / sum(week_demand),
        'year_demand': month_demand / sum(month_demand),

        'day_profit': day_profit / sum(day_profit),
        'week_profit': week_profit / sum(week_profit),
        'year_profit': month_profit / sum(month_profit),

        'most_profitable': most_popular_profitable,
        'least_profitable': least_popular_profitable,
    }

In [76]:
get_point_statistics()

{'day_demand_mean': 333.3333333333333,
 'week_demand_mean': 142.85714285714286,
 'year_demand_mean': 83.33333333333333,
 'day_profit_mean': 8449.523333333333,
 'week_profit_mean': 3621.2242857142846,
 'year_profit_mean': 2112.3808333333327,
 'day_demand': array([0.72, 0.28, 0.  ]),
 'week_demand': array([1., 0., 0., 0., 0., 0., 0.]),
 'year_demand': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.]),
 'day_profit': array([0.72847068, 0.27152932, 0.        ]),
 'week_profit': array([1., 0., 0., 0., 0., 0., 0.]),
 'year_profit': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.]),
 'most_profitable': array([    0, 33046, 33047, 33048, 33049, 33050, 33051, 33052, 33053,
        33054]),
 'least_profitable': array([37602, 47766, 39190, 17638, 25133, 12427, 16797, 21137, 42803])}

In [106]:
user_orders_products = orders_products[orders_products.user_id == 2]
points_popularity = user_orders_products.Point_id.value_counts()
points_popularity.index[:-10:-1]

Int64Index([785, 2930, 8522, 1014, 6214, 5792, 1528, 8288, 292], dtype='int64')

In [128]:
def get_user_statistics(user_id=2, n=10):
    user_orders_products = orders_products[orders_products.user_id == user_id]
    points_popularity = user_orders_products.Point_id.value_counts()
    product_popularity = user_orders_products.product_id.value_counts()

    money_spent_on_popular = user_orders_products[user_orders_products.Point_id.isin(points_popularity.index[:n])].Price.sum()
    money_spent = user_orders_products.Price.sum()

    return {
            'most_popular_point': points_popularity.index[:n].to_list(),
            'least_popular_point': points_popularity.index[:-n-1:-1].to_list(),
            'most_popular_product': product_popularity.index[:n].to_list(),
            'least_popular_product': product_popularity.index[:-n-1:-1].to_list(),
            'money_spent_on_popular': money_spent_on_popular,
            'money_spent': money_spent,
    }

In [132]:
get_user_statistics(user_id=2, n=5)

{'most_popular_point': [7566, 2086, 6333, 1630, 6513],
 'least_popular_point': [785, 2930, 8522, 1014, 6214],
 'most_popular_product': [32792.0, 47209.0, 24852.0, 1559.0, 19156.0],
 'least_popular_product': [7963.0, 15841.0, 18961.0, 13742.0, 14306.0],
 'money_spent_on_popular': 427.6399999999999,
 'money_spent': 4535.99}

: 