In [None]:
import gymnasium as gym
import random
import numpy as np
from dataclasses import dataclass
from skfuzzy.membership import trapmf
from typing import List


np.random.seed(42)
random.seed(42)

In [None]:
env = gym.make("MountainCar-v0", render_mode="human", goal_velocity=0.1)

observation, info = env.reset(seed=42)
for _ in range(1000):
    action = env.action_space.sample()
    observation, reward, terminated, truncated, info = env.step(action)
    
    if terminated or truncated:
        observation, info = env.reset()
env.close()

In [None]:
@dataclass
class Trapezium:
    left: float
    left_top: float
    right_top: float
    right: float

    def membership_value(self, input_value: float) -> float:
        x = np.array([input_value])
        params = [self.left, self.left_top, self.right_top, self.right]
        return float(trapmf(x, params)[0])
    
    
class InputStateVariable:
    def __init__(self, *fuzzy_sets: Trapezium):
        self.fuzzy_set_list = fuzzy_sets

    def get_fuzzy_sets(self):
        return self.fuzzy_set_list
    
    def get_memberships(self, value: float) -> List[float]:
        return [fs.membership_value(value) for fs in self.fuzzy_set_list]
    
    
class Build:
    def __init__(self, *input_vars: InputStateVariable):
        self.input_vars = input_vars

    def get_input(self):
        return self.input_vars

    def get_number_of_fuzzy_sets(self, input_variable: InputStateVariable) -> int:
        return len(input_variable.get_fuzzy_sets())

    def get_number_of_rules(self) -> int:
        num_rules = 1
        for var in self.input_vars:
            num_rules *= self.get_number_of_fuzzy_sets(var)
        return num_rules

In [None]:
class FQLModel:
    def __init__(self, gamma: float, alpha: float, epsilon: float, action_set_length: int, fis: Build):
        self.gamma = gamma              # Discount factor
        self.alpha = alpha              # Learning rate
        self.epsilon = epsilon          # Exploration rate
        self.action_set_length = action_set_length
        self.fis = fis                  # Fuzzy inference system

        self.q_table = np.zeros((self.fis.get_number_of_rules(), action_set_length))

        # Internal state
        self.R: List[float] = []        # Degree of truth values for current state
        self.R_: List[float] = []       # Degree of truth for previous state
        self.M: List[int] = []          # Selected action index per rule
        self.V: List[float] = []        # State value history
        self.Q: List[float] = []        # Q-value history
        self.Error: float = 0.0         # TD error

In [None]:
# Example fuzzy sets for Mountain Car environment
position_sets = [
    Trapezium(-1.5, -1.2, -0.9, -0.3),  
    Trapezium(-0.9, -0.3, 0.3, 0.6),     
    Trapezium(-0.3, 0.3, 0.6, 0.9)       
]
velocity_sets = [
    Trapezium(-0.1, -0.07, -0.035, 0),  
    Trapezium(-0.035, 0, 0, 0.035),      
    Trapezium(0, 0.035, 0.07, 0.1)       
]

In [15]:
position_var = InputStateVariable(*position_sets)
position_var

<__main__.InputStateVariable at 0x138bf743730>