In [82]:
from abc import ABC, abstractmethod
import numpy as np
import matplotlib.pyplot as plt

In [83]:
class InterfaceEnvironment(ABC):
    
    @abstractmethod
    def __init__(self):
        pass
    
    @property
    @abstractmethod
    def states(self):
        """The representation of the state space"""
        pass
    
    
    @abstractmethod
    def step(self, action):
        """Takes a step. Uses action for the step
        
        returns
        =======
        
        
        """
        pass
    
    @abstractmethod
    def reset(self):
        """resets environment. Defaults back to starting period. 
        reset method also can take a state (used for solving the model)"""
        pass
    
    

In [165]:
class EnvironmentModel1(InterfaceEnvironment):
    
    """
    Ordering of items
    states: Q, M, K, W
    shocks: epsilon, rho, psi
    """
    
    DEFAULT_AGE = 18
    DEFAULT_ASSETS = 500
    DEFAULT_KIDS = 0
    DEFAULT_WAGE = 120
    
    def __init__(self, kappa, mu_rho, sigma_rho, p_psi, sigma_epsilon, zeta, a, b, c, d):
        
        #parameters
        self.kappa = kappa
        self.mu_rho = mu_rho
        self.sigma_rho = sigma_rho
        self.p_psi = p_psi
        self.sigma_epsilon = sigma_epsilon
        self.zeta = zeta
        
        self.a = a
        self.b = b
        self.c = c
        self.d = d
        
        #states
        self.age = self.DEFAULT_AGE
        self.assets = self.DEFAULT_ASSETS
        self.kids = self.DEFAULT_KIDS
        self.wage = self.DEFAULT_WAGE
        
    def __repr__(self):
        return f"(Q: {self.age}, A: {self.assets}, K: {self.kids}, Z: {self.wage})"
    
    
    @property
    def states(self):
        return self.age, self.assets, self.kids, self.wage
    
    
    def reset(self, states=None):
        if states is not None:
            age, assets, kids, wage = states[0], states[1], states[2], states[3]
            self.age = age
            self.assets = assets
            sef.kids = kids
            self.wage = wage
        else:
            self.age = self.DEFAULT_AGE
            self.assets = self.DEFAULT_ASSETS
            self.kids = self.DEFAULT_KIDS
            self.wage = self.DEFAULT_WAGE
        
    def step(self, action, shocks=None):
        if shocks is None:
            epsilon, rho, psi = self.draw_shocks()
   
        # remember action: hours (H)
    
        ### model dynamic
        leisure = self.calc_leisure(action)
        B = self.calc_B()
        total_wage = self.calc_total_wage()
            
        salary = self.calc_salary(total_wage, action)
        
        consumption = self.calc_consumption(salary)
        
        utility = self.calc_utility(consumption, leisure, B)
        
        ### transition
        self.calc_assets(salary, consumption, rho)
        self.calc_kids(psi)
        self.calc_wage(epsilon)
        self.calc_age()
        
        # this might be changed
        dies = self.calc_dies()
        if dies is True:
            return self.states, 0, True, f'consumption: {consumption}' 
        
        return self.states, utility, False, f'consumption: {consumption}'

    
    #model dynamic
    def calc_utility(self, consumption, leisure, B):
        return consumption**(1 - B) * leisure**(B)
    
    def calc_B(self):
        return self.f_B()
    
    def calc_salary(self, total_wage, hours):
        return hours * total_wage
    
    def calc_total_wage(self):
        return self.f_w() + self.wage
    
    def calc_consumption(self, salary):
        return self.kappa * (self.assets + salary)
    
    def calc_leisure(self, hours):
        return 7 * 24 - hours
    
    def calc_dies(self):
        # stops the model (returns done flag)
        if self.age > 85:
            return True
        return False
    
    # transitions (L.O.M)
    def calc_assets(self, salary, consumption, rho):
        self.assets = rho * (self.assets + salary - consumption)
    
    def calc_kids(self, psi):
        if self.kids < 5:
            self.kids = self.kids + psi
    
    def calc_wage(self, epsilon):
        self.wage = self.zeta * self.wage * epsilon
    
    def calc_age(self):
        self.age = self.age + 1
    
    # def shocks
    def draw_shocks(self):
        return (self.draw_epsilon(), self.draw_rho(), self.draw_psi())
        
    def draw_epsilon(self):
        return np.exp(np.random.lognormal(0, self.sigma_epsilon))
    
    def draw_rho(self):
        return np.random.normal(self.mu_rho, self.sigma_rho)
    
    def draw_psi(self):
        return np.random.binomial(1,self.p_psi)
            
    
    # function
    def f_B(self):
        _exp = np.exp(self.c + self.d*self.kids)
        return self.a + self.b* ( _exp / (1 + _exp))
    
    
    def f_w(self):
        # should be changed when data is available for the permanint income structure of the income dynamic
        return 1

In [166]:
parameters = {
    'kappa' : 0.5, 
    'mu_rho' : 0.06, 
    'sigma_rho' : 0.12, 
    'p_psi' : 0.3, 
    'sigma_epsilon' : 0.1, 
    'zeta': 0.3,
    'a' : 0.3, 
    'b' : 0.3, 
    'c' : -2, 
    'd' : 1.5
}
env = EnvironmentModel1(**parameters)

In [167]:
def random_action():
    actions = [0, 15, 25, 37, 45]
    return np.random.choice(actions)

In [175]:
parameters = {
    'kappa' : 0.5, 
    'mu_rho' : 0.06, 
    'sigma_rho' : 0.12, 
    'p_psi' : 0.3, 
    'sigma_epsilon' : 0.5, 
    'zeta': 0.3,
    'a' : 0.3, 
    'b' : 0.3, 
    'c' : -2, 
    'd' : 1.5
}

env = EnvironmentModel1(**parameters)

done = False
while not done:
    action = random_action()
    _states, reward, done, _info = env.step(action)
    print('utility', reward)
    print(env, '\n')

utility 1020.2152104174124
(Q: 19, A: 192.10773108800527, K: 0, Z: 158.4476574104846) 

utility 1176.435486207378
(Q: 20, A: 489.56904335233975, K: 0, Z: 74.91097147498827) 

utility 704.5709719447486
(Q: 21, A: 32.364071747407706, K: 0, Z: 67.38698808023722) 

utility 35.50249878644469
(Q: 22, A: 1.3862043907461477, K: 0, Z: 40.65928763511369) 

utility 246.20622579037402
(Q: 23, A: 31.273055299943927, K: 0, Z: 74.41964922436176) 

utility 371.3203624506555
(Q: 24, A: -28.329608775359794, K: 0, Z: 54.02323968454026) 

utility 506.63868065029027
(Q: 25, A: -25.298121235257653, K: 0, Z: 25.737074138233282) 

utility 311.2245052574876
(Q: 26, A: -46.68028003853941, K: 0, Z: 26.485864778233335) 

utility nan
(Q: 27, A: 0.7666726485920702, K: 0, Z: 14.388137220040514) 

utility 219.56621264586994
(Q: 28, A: 37.78937723047059, K: 0, Z: 15.877529740813447) 

utility 242.53677543171298
(Q: 29, A: 31.812583107438403, K: 1, Z: 13.018190860650314) 

utility 133.3488420302952
(Q: 30, A: -7.042415



# Random Values

In [None]:
states_history = list()
rewards_history = list()
actions_history = list()

env.reset()

for i in range(100):
    
    env.reset()
    states, actions = list(), list()
    
    done = False
    G = 0.0
    
    while not done:
        action = random_action()
        _states, reward, done, _info = env.step(action)
        print(_states, _info)
        G += reward
        states.append(_states)
        actions.append(action)
    
    states_history.append(states)
    rewards_history.append(G)
    actions_history.append(action)
    
        