<a href="https://colab.research.google.com/github/ConSeanway/BusinessGameRL/blob/master/BusRL_Demand_based_on_Price.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%tensorflow_version 1.x
!pip install stable-baselines[mpi]==2.10.0



TensorFlow 1.x selected.
Collecting stable-baselines[mpi]==2.10.0
[?25l  Downloading https://files.pythonhosted.org/packages/e5/fe/db8159d4d79109c6c8942abe77c7ba6b6e008c32ae55870a35e73fa10db3/stable_baselines-2.10.0-py3-none-any.whl (248kB)
[K     |████████████████████████████████| 256kB 4.8MB/s 
Installing collected packages: stable-baselines
  Found existing installation: stable-baselines 2.2.1
    Uninstalling stable-baselines-2.2.1:
      Successfully uninstalled stable-baselines-2.2.1
Successfully installed stable-baselines-2.10.0


In [0]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib
from math import exp
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.distributions import Categorical
import gym
from gym import spaces
from stable_baselines.common.policies import MlpPolicy
from stable_baselines.common.vec_env import DummyVecEnv
from stable_baselines import PPO2
from stable_baselines import TRPO

In [0]:
'''
Sean Conway and Yanzhe Ma
Business Game Reinforcement Learning PyProject
Updated 28MAY2020
'''

MAX_PERIODS = 2
INITIAL_ASSETS = 100000
agentNames = ['Rackhouse LLC', 'Phantom Thieves', 'Team Rocket', 'The Liberal Artists', 'The Consultants']
cityNames = ['Atlanta', 'Shanghai', 'Vancouver', 'Tokyo', 'Bethlehem']

# Environment is akin to a "world" class
class Environment():
  def __init__(self):
    self.period = 1
  
  def timeskip(self):
    self.period += 1


In [0]:
# Leaderboard class enables us to track which teams are winning the game, and are in which position
class Leaderboard():
  def __init__(self):
    self.leaderboard = {}
    for team in agentNames:
      self.leaderboard[team] = INITIAL_ASSETS
  
  # Prints out the leaderboard
  def displayLeaderboard(self):
    print()
    print("Current Leaderboard")
    currLeaderboard = sorted(self.leaderboard.items(), reverse=True, key=lambda x: x[1])
    for elem in currLeaderboard:
        print(elem[0], " :: $", elem[1])
    print()

  # update the leaderboard based upon the values provided
  def updateLeaderboard(self, agentList):
    newLeaderboard = {}
    for team in agentList:
      newLeaderboard[team.teamName] = team.totalAssets
    self.leaderboard = newLeaderboard

In [0]:
# Agent class defines a player of the business game (or a team)
class Agent():
  def __init__(self, teamName):
    self.cityPrices = {}
    for i in cityNames:  self.cityPrices[i] = 0
    self.teamName = teamName
    self.totalAssets = INITIAL_ASSETS

  # Set price in a given city
  def setPriceInCity(self, newPrice, city):
    self.cityPrices[city] = newPrice

  # Set prices in all cities
  def setPrices(self):
    # Creating the price list for this period
    for city in self.cityPrices.keys():
      newPrice = 10
      self.setPriceInCity(newPrice, city)


In [0]:
# Market's purpose is to gather the demand for every agent, and create the demand
class Market():
  def __init__(self, cityName):
    self.teamPrices = {}
    for agent in agentNames: self.teamPrices[agent] = 0
    self.cityName = cityName
    self.totalDemand = 0

  # Updates the price for a given team
  def updateTeamPrice(self, team, newPrice):
    self.teamPrices[team] = newPrice

  # Determine the demand for this market, first determining the mean and stdev, and using that to generate a normally distributed demand
  def computeDemand(self, period):
    meanDemand = self.computeMeanDemand(period)
    stdDev = meanDemand / 8
    demand = np.random.normal(loc=meanDemand, scale=stdDev)
    self.totalDemand = demand
    self.printDemand()

  # Determine the mean demand for this city, given the period and minimum price
  def computeMeanDemand(self, period):
    meanDemand = ((1.03) ** period) * 10840 * exp(-0.2216 * min(self.teamPrices.values()) / 10)
    return meanDemand

  def printDemand(self):
    print(str(self.cityName) + ": " + str(int(self.totalDemand)))

In [0]:
# Initialization method for the business game
def initializeBusinessGame():

  # Initialize the environment, as well as a leaderboard
  myEnv = Environment()
  myLeaderboard = Leaderboard()

  # Create a list of all of the different markets and cities
  cityList = []
  for i in cityNames: cityList.append(Market(i))

  # Create a list of all of the different agents (players) to be used for this game
  agentList = []
  for i in agentNames: agentList.append(Agent(i))

  return myEnv, myLeaderboard, cityList, agentList

In [0]:
def computeAllCityDemands(cityList, agentList):
  # Once all agents have set their prices, we're going to compute the demands for each city
  for city in cityList:

    # Find all prices for the city, then use the minimum to compute demand
    for agent in agentList:
      city.updateTeamPrice(agent.teamName, agent.cityPrices[city.cityName])

    # Determine the mean demand for this period
    city.computeDemand(myEnv.period)

In [0]:
def computeMarketShare():
  pass

In [117]:
# Main method of the business game, as abstracted as possible
def main():

  myEnv, myLeaderboard, cityList, agentList = initializeBusinessGame()

  # Run the business game code until the maximum number of periods have been reached
  for period in range(MAX_PERIODS):

    # Print the header for this period
    print("Period " + str(myEnv.period))
    print("-------------------")

    # Have each agent set prices, decide production quantities, etc. for each period
    for agent in agentList:
      agent.setPrices()

    # Once all agents have set prices, compute city demands, and then compute profits
    computeAllCityDemands(cityList, agentList)

    # TODO (Work on me please), also write a computeProfit method
    computeMarketShare()

    # Increment the period, update the leaderboard, and display it
    myEnv.timeskip()

    myLeaderboard.updateLeaderboard(agentList)
    myLeaderboard.displayLeaderboard()


main()


Period 1
-------------------
Atlanta: 8731
Shanghai: 9114
Toronto: 9977
Shibuya: 8879

Current Leaderboard
Rackhouse LLC  :: $ 9000
Phantom Thieves  :: $ 9000
Team Rocket  :: $ 9000
The Liberal Artists  :: $ 9000
The Consultants  :: $ 9000

Period 2
-------------------
Atlanta: 9525
Shanghai: 11298
Toronto: 9358
Shibuya: 10111

Current Leaderboard
Rackhouse LLC  :: $ 9000
Phantom Thieves  :: $ 9000
Team Rocket  :: $ 9000
The Liberal Artists  :: $ 9000
The Consultants  :: $ 9000



In [0]:
class CustomEnv(gym.Env):

  def __init__(self, INIT_BALANCE=0, discRate=0.02, maxPeriods=12):
    super(CustomEnv, self).__init__()

    # Define action and observation space
    # They must be gym.spaces objects
    # Example when using discrete actions:
    self.action_space = spaces.Box(low=np.array([0]), high=np.array([100]),shape = (1,),dtype=np.float32) # Only one action allowed:  change the price for the current period
    # observation space: look 3 periods before the current period
    self.observation_space = spaces.Box(low=0, high=10, shape=(1,), dtype=np.float32)
    
    #self.balance = INIT_BALANCE
    #self.discountRate = discRate
    self.maxPeriods = 1
    #self.INIT_BALANCE = INIT_BALANCE

    self.reset()

    # Rewards will need to be a function
    # Multiply the price times the quantity to get the reward (profit)


  # DATA STORAGE (columns):
  # 0 = time
  # 1 = current Price
  # 2 = current period reward
  # 3 = total reward
  # 4 = previous period price
  # 5 = previous period reward

  def step(self, action):
    self.time +=1
    price = np.random.normal(self.time*2+20, 2, 1)
    reward = -(price-action)**2
    self.state = np.array([self.time])
 
    return self.state,reward,self.time > 4,{}

