# 2. 複数のカラムで全ての組み合わせを持ったマスタを作成したい
すべての組み合わせを持ったマスタを作成する場合を検証します。  
このようなニーズがどこで発生するかというと、例えば、ゲーム内のユーザー毎に各アイテムの所持数の推移を日次で見たい場合に、ユーザー、日付、アイテムの全ての組み合わせを持ったマスターを作る必要があります。  
- ユーザーID (user_id)
- 日付 (date)
- アイテムID (item_id)
- アイテム名 (item_name)

In [2]:
from itertools import product
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from utilities.process_time import PandasProcessTimeMeasure

plt.rcParams['font.size'] = 12
plt.rcParams['figure.figsize'] = (10, 6)

In [24]:
def create_values(date_periods):
    user_id_list = [i+1 for i in range(100)]
    date_list = pd.date_range('2018-01-01', periods=date_periods, freq='D').map(lambda d: d.strftime('%Y-%m-%d')).tolist()
    item_id_list = [1, 2, 3, 4, 5]
    item_name_list = ['item01', 'item02', 'item03', 'item04', 'item05']
    return user_id_list, date_list, item_id_list, item_name_list

## 方法1　1行毎にDataFrame化して`.append()`で積み上げる

In [40]:
def method1(sample_size):
    user_id_list, date_list, item_id_list, item_name_list = create_values(sample_size)
    master = pd.DataFrame()
    for user_id in user_id_list:
        for date in date_list:
            for item_id, item_name in zip(item_id_list, item_name_list):
                _master = pd.DataFrame(
                    data={
                        'user_id': [user_id],
                        'date': [date],
                        'item_id': [item_id],
                        'item_name': [item_name]
                    },
                    columns=['user_id', 'date', 'item_id', 'item_name']
                )
                master = master.append(_master)
    master.reset_index(drop=True, inplace=True)
    return master


method1(10).head(10)

Unnamed: 0,user_id,date,item_id,item_name
0,1,2018-01-01,1,item01
1,1,2018-01-01,2,item02
2,1,2018-01-01,3,item03
3,1,2018-01-01,4,item04
4,1,2018-01-01,5,item05
5,1,2018-01-02,1,item01
6,1,2018-01-02,2,item02
7,1,2018-01-02,3,item03
8,1,2018-01-02,4,item04
9,1,2018-01-02,5,item05


## 方法2　listで全組み合わせ作成後にDataFrame化する　`.append()`バージョン

In [41]:
def method2(sample_size):
        user_id_list, date_list, item_id_list, item_name_list = create_values(sample_size)
        master_user = []
        master_date = []
        master_item_id = []
        master_item_name = []
        for user_id in user_id_list:
            for date in date_list:
                for item_id, item_name in zip(item_id_list, item_name_list):
                    master_user.append(user_id)
                    master_date.append(date)
                    master_item_id.append(item_id)
                    master_item_name.append(item_name)
        master = pd.DataFrame(
            data={
                'user_id': master_user,
                'date': master_date,
                'item_id': master_item_id,
                'item_name': master_item_name
            },
            columns=['user_id', 'date', 'item_id', 'item_name']
        )
        return master


method2(10).head(10)

Unnamed: 0,user_id,date,item_id,item_name
0,1,2018-01-01,1,item01
1,1,2018-01-01,2,item02
2,1,2018-01-01,3,item03
3,1,2018-01-01,4,item04
4,1,2018-01-01,5,item05
5,1,2018-01-02,1,item01
6,1,2018-01-02,2,item02
7,1,2018-01-02,3,item03
8,1,2018-01-02,4,item04
9,1,2018-01-02,5,item05


## 方法3　listで全組み合わせ作成後にDataFrame化する　`.extend()`バージョン

In [42]:
def method3(sample_size):
        user_id_list, date_list, item_id_list, item_name_list = create_values(sample_size)
        master_user = []
        master_date = []
        master_item_id = []
        master_item_name = []
        n_item = len(item_id_list)
        for user_id in user_id_list:
            for date in date_list:
                master_user.extend([user_id] * n_item)
                master_date.extend([date] * n_item)
                master_item_id.extend(item_id_list)
                master_item_name.extend(item_name_list)
        master = pd.DataFrame(
            data={
                'user_id': master_user,
                'date': master_date,
                'item_id': master_item_id,
                'item_name': master_item_name
            },
            columns=['user_id', 'date', 'item_id', 'item_name']
        )
        return master


method3(10).head(10)

Unnamed: 0,user_id,date,item_id,item_name
0,1,2018-01-01,1,item01
1,1,2018-01-01,2,item02
2,1,2018-01-01,3,item03
3,1,2018-01-01,4,item04
4,1,2018-01-01,5,item05
5,1,2018-01-02,1,item01
6,1,2018-01-02,2,item02
7,1,2018-01-02,3,item03
8,1,2018-01-02,4,item04
9,1,2018-01-02,5,item05


In [43]:
process_time_measure = PandasProcessTimeMeasure(
    sample_sizes=[10, 20, 30]
)
process_time_measure.set_method(name='method01', method=method1)
process_time_measure.set_method(name='method02', method=method2)
process_time_measure.set_method(name='method03', method=method3)
process_time_measure.measure_process_time_for_each_sample_sizes()
process_time_measure.plot_process_time()

KeyboardInterrupt: 