In [1]:
import pandas as pd
import numpy as np

# 可視化
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# データの読込
df_temp = pd.read_csv('../data/temperature.csv')

# 前処理
df_temp['datetime'] = pd.to_datetime(df_temp['datetime'])
df_temp['date'] = df_temp['datetime'].dt.date
df_daily_temp = df_temp.groupby('date')['temperature'].median().reset_index()
df_temp = pd.merge(df_temp, df_daily_temp, on='date', how='left', suffixes=('', '_daily'))

In [3]:
class VendingMachine:
  def __init__(self, id, drinks, location):
    self.id = id
    self.drinks = drinks
    self.location = location

  def simulate_sales(self, datetime, temp):
    # 初期化
    self.datetime = datetime
    self.count = dict()
    self.sales = dict()

    # 飲料ごとに売上をシミュレーション
    hour = datetime.dt.hour
    for drink in self.drinks:
      rate = drink.rate[hour] * drink.popularity

      # 温度による影響
      if drink.warm:
        rate = np.exp(np.log(rate) + 0.02*temp)
      else:
        rate = np.exp(np.log(rate) - 0.02*temp)

      # 場所による影響
      rate = rate * self.location

      # シミュレーション
      self.count[drink.id] = np.random.poisson(rate)
      self.sales[drink.id] = self.count[drink.id] * drink.price
  
  def get_sales(self):
    if self.sales is None:
      raise Exception('Please run simulate_sales() first.')
    
    dfs = list()
    for drink in self.drinks:
      df = pd.DataFrame({
        'machine_id': self.id,
        'product_id': drink.id,
        'datetime': self.datetime,
        'count': self.count[drink.id],
        'sales': self.sales[drink.id]
      })
      dfs.append(df)
      
    return pd.concat(dfs, axis=0)

class Drink:
  def __init__(self, id, price, warm, popularity):
    self.id = id                 # 飲料名
    self.price = price           # 価格 (円)
    self.warm = warm             # 種類 (温かい, 冷たい)
    self.popularity = popularity # 人気度 = 1日の売上数の期待値

    path = '../data/' + id + '.xlsx'
    df = pd.read_excel(path)
    self.rate = df['prop'].values

In [4]:
df_drinks = pd.read_excel('../data/drink_specs.xlsx')

In [29]:
drinks = [
  Drink('pid001', 120, False, 50),
  Drink('pid002', 100, True, 30),
  Drink('pid003', 150, False, 70)
]

vms = [
  VendingMachine('mid001', drinks, 1.0),
  VendingMachine('mid002', drinks, 0.8),
  VendingMachine('mid003', drinks, 0.6)
]

In [33]:
t = df_temp['datetime']
c = df_temp['temperature_daily']

for vm in vms:
  vm.simulate_sales(t, c)
  df_sales = vm.get_sales()
  df_sales.to_csv('../outputs/' + vm.id + '.csv', index=False)

In [8]:
# ドリンクマスタ
drink_master = pd.DataFrame({
  'product_id': ['pid001', 'pid002', 'pid003'],
  'product_name': ['soda', 'coffee', 'tea'],
  'price': [120, 100, 150],
})
drink_master.to_csv('../outputs/drink_master.csv', index=False)

# 自販機マスタ
vm_master = pd.DataFrame({
  'machine_id': ['mid001', 'mid002', 'mid003'],
  'location': ['Yurakucho', 'Jinbocho', 'Ochanomizu']
})
vm_master.to_csv('../outputs/vm_master.csv', index=False)